Skip to main content

s4_server/
service.rs

1//! `s3s::S3` 実装 — `s3s_aws::Proxy` への delegation を default にしつつ、
2//! `put_object` / `get_object` 経路で `s4_codec::CodecRegistry` を呼ぶ。
3//!
4//! ## カバー範囲 (Phase 1 月 2)
5//!
6//! - 圧縮 hook あり: `put_object`, `get_object`
7//! - 純 delegation (圧縮なし): `head_bucket`, `list_buckets`, `create_bucket`, `delete_bucket`,
8//!   `head_object`, `delete_object`, `delete_objects`, `copy_object`, `list_objects`,
9//!   `list_objects_v2`, `create_multipart_upload`, `upload_part`,
10//!   `complete_multipart_upload`, `abort_multipart_upload`, `list_multipart_uploads`,
11//!   `list_parts`
12//! - 未対応 (デフォルトで NotImplemented): その他 80+ ops (Tagging / ACL / Lifecycle 等は Phase 2)
13//!
14//! ## アーキテクチャ
15//!
16//! - `S4Service<B>` は backend (B: S3) と `Arc<CodecRegistry>` と `Arc<dyn CodecDispatcher>`
17//!   を保持する。`CodecRegistry` 経由で複数 codec を抱えられるので、ひとつの S4 インスタンスが
18//!   複数 codec で書かれた object を透過的に GET できる
19//! - PUT: dispatcher が body の先頭 sample から codec を選び、registry で compress、
20//!   manifest を S3 metadata に書いて backend に forward
21//! - GET: backend から取得 → metadata から manifest を復元 → registry.decompress で
22//!   manifest 指定の codec で解凍 → 元の bytes を return
23//!
24//! ## 既知の制限事項
25//!
26//! - **Multipart Upload は per-part 圧縮が未実装**: 現状は upload_part を素通し。
27//!   Phase 1 月 2 後半で per-part compress + complete_multipart_upload で manifest 集約。
28//! - **PUT body は memory に collect**: max_body_bytes 上限あり (default 5 GiB = S3 単発 PUT 上限)。
29//!   Streaming-aware 圧縮は Phase 2。
30
31use std::sync::Arc;
32
33use base64::Engine as _;
34use bytes::BytesMut;
35use s3s::dto::*;
36use s3s::{S3, S3Error, S3ErrorCode, S3Request, S3Response, S3Result};
37use s4_codec::index::{FrameIndex, build_index_from_body, decode_index, encode_index, sidecar_key};
38use s4_codec::multipart::{
39    FRAME_HEADER_BYTES, FrameHeader, FrameIter, S3_MULTIPART_MIN_PART_BYTES, pad_to_minimum,
40    write_frame,
41};
42use s4_codec::{ChunkManifest, CodecDispatcher, CodecKind, CodecRegistry, CompressTelemetry};
43use std::time::Instant;
44use tracing::{debug, info};
45
46use crate::blob::{
47    bytes_to_blob, chain_sample_with_rest, collect_blob, collect_with_sample, peek_sample,
48};
49use crate::streaming::{
50    Crc32cVerifyingReader, async_read_to_blob, blob_to_async_read, cpu_zstd_decompress_stream,
51    pick_chunk_size, streaming_compress_to_frames, supports_streaming_compress,
52    supports_streaming_decompress,
53};
54
55/// PUT body の先頭 sampling で渡す最大 byte 数。
56const SAMPLE_BYTES: usize = 4096;
57
58/// v0.8 #55: stamp the GPU pipeline metrics (`s4_gpu_compress_seconds`,
59/// `s4_gpu_throughput_bytes_per_sec`, `s4_gpu_oom_total`) from a
60/// `CompressTelemetry` returned by `CodecRegistry::compress_with_telemetry`.
61/// CPU codecs (`gpu_seconds = None`) are no-ops here — they're already
62/// covered by the existing `s4_request_latency_seconds` / `s4_bytes_*`
63/// counters in the request-level `record_put` / `record_get` calls.
64#[inline]
65fn stamp_gpu_compress_telemetry(tel: &CompressTelemetry) {
66    if let Some(secs) = tel.gpu_seconds {
67        crate::metrics::record_gpu_compress(tel.codec, secs, tel.bytes_in, tel.bytes_out);
68    }
69    if tel.oom {
70        crate::metrics::record_gpu_oom(tel.codec);
71    }
72}
73
74/// v0.7 #49: percent-encoding set covering everything that is **not** an
75/// `unreserved` character per RFC 3986 §2.3, **plus** we additionally
76/// encode the path-reserved sub-delims that `http::Uri` rejects in a
77/// path segment (`?`, `#`, `%`, control bytes, space, etc.). We
78/// deliberately keep `/` un-encoded because S3 keys legally use `/` as
79/// a logical separator and the rest of the synthetic URI relies on the
80/// path layout `/{bucket}/{key}` round-tripping byte-for-byte.
81const URI_KEY_ENCODE_SET: &percent_encoding::AsciiSet = &percent_encoding::CONTROLS
82    .add(b' ')
83    .add(b'"')
84    .add(b'#')
85    .add(b'<')
86    .add(b'>')
87    .add(b'?')
88    .add(b'`')
89    .add(b'{')
90    .add(b'}')
91    .add(b'|')
92    .add(b'\\')
93    .add(b'^')
94    .add(b'[')
95    .add(b']')
96    .add(b'%');
97
98/// v0.7 #49: build the synthetic `/{bucket}/{key}` request URI used by
99/// the sidecar / replication helpers when they re-enter the backend
100/// trait without going through the HTTP layer. S3 object keys can
101/// contain spaces, control bytes, and arbitrary Unicode that would
102/// make `format!(...).parse::<http::Uri>()` panic; we percent-encode
103/// the key bytes (RFC 3986 path segment) and the bucket name (defensive
104/// — bucket names are normally DNS-safe, but the helper is the single
105/// choke-point) before splicing them in. If the encoded form *still*
106/// fails to parse (extremely unlikely once everything outside the
107/// unreserved set is escaped) we surface a typed `400 InvalidObjectName`
108/// instead of crashing the worker.
109pub(crate) fn safe_object_uri(bucket: &str, key: &str) -> S3Result<http::Uri> {
110    use percent_encoding::utf8_percent_encode;
111    let bucket_enc = utf8_percent_encode(bucket, URI_KEY_ENCODE_SET);
112    let key_enc = utf8_percent_encode(key, URI_KEY_ENCODE_SET);
113    let raw = format!("/{bucket_enc}/{key_enc}");
114    raw.parse::<http::Uri>().map_err(|e| {
115        // S3 spec uses `InvalidObjectName` (HTTP 400) for keys that
116        // can't be represented in a request URI. The generated
117        // `S3ErrorCode` enum doesn't expose a typed variant for it,
118        // so we round-trip through `from_bytes` which preserves the
119        // canonical wire string while falling back to InvalidArgument
120        // if even that lookup fails (cannot happen at runtime — kept
121        // as a belt-and-suspenders branch so this helper never
122        // panics).
123        let code =
124            S3ErrorCode::from_bytes(b"InvalidObjectName").unwrap_or(S3ErrorCode::InvalidArgument);
125        S3Error::with_message(
126            code,
127            format!("object key cannot be encoded as a request URI: {e}"),
128        )
129    })
130}
131
132/// v0.4 #20: captured at the start of a handler, before the request is
133/// consumed by the backend call, so the matching `record_access` at
134/// end-of-request can fill in the structured access log entry.
135struct AccessLogPreamble {
136    remote_ip: Option<String>,
137    requester: Option<String>,
138    request_uri: String,
139    user_agent: Option<String>,
140}
141
142pub struct S4Service<B: S3> {
143    /// Wrapped in `Arc` so the v0.6 #40 cross-bucket replication
144    /// dispatcher can clone it into a detached `tokio::spawn` task
145    /// (Arc::clone is cheap; backend trait methods take `&self` so no
146    /// other handler is affected by the indirection).
147    backend: Arc<B>,
148    registry: Arc<CodecRegistry>,
149    dispatcher: Arc<dyn CodecDispatcher>,
150    max_body_bytes: usize,
151    policy: Option<crate::policy::SharedPolicy>,
152    /// v0.3 #13: surfaced as the `aws:SecureTransport` Condition key. Set
153    /// to `true` when the listener is wrapped in TLS (or ACME), so policies
154    /// gating "deny if not over TLS" can do their job. Defaults to `false`
155    /// (HTTP); set via [`S4Service::with_secure_transport`] at boot.
156    secure_transport: bool,
157    /// v0.4 #19: optional per-(principal, bucket) token-bucket limiter.
158    rate_limits: Option<crate::rate_limit::SharedRateLimits>,
159    /// v0.4 #20: optional S3-style access log emitter.
160    access_log: Option<crate::access_log::SharedAccessLog>,
161    /// v0.4 #21 / v0.5 #29: optional server-side encryption keyring
162    /// (AES-256-GCM). When set, every PUT body gets wrapped in S4E2
163    /// (with the keyring's active key id) after the compress + framing
164    /// steps; every GET that sniffs as S4E1/S4E2 is decrypted before
165    /// frame parsing. A `with_sse_key(...)` call wraps the supplied
166    /// key in a 1-slot keyring so single-key (v0.4) operators get the
167    /// same behaviour they had before, just on the v2 frame.
168    sse_keyring: Option<crate::sse::SharedSseKeyring>,
169    /// v0.5 #34: optional first-class versioning state machine. When
170    /// `Some(...)`, S4-server itself owns the per-bucket versioning
171    /// state + per-(bucket, key) version chain; PUT / GET / DELETE /
172    /// list_object_versions / get_bucket_versioning /
173    /// put_bucket_versioning handlers consult the manager instead of
174    /// passing through. When `None` (default), the legacy
175    /// backend-passthrough behaviour applies so existing v0.4
176    /// deployments are unaffected until they explicitly call
177    /// `with_versioning(...)`.
178    versioning: Option<Arc<crate::versioning::VersioningManager>>,
179    /// v0.5 #28: optional SSE-KMS envelope-encryption backend. When
180    /// `Some(...)`, PUTs carrying `x-amz-server-side-encryption: aws:kms`
181    /// generate a fresh DEK via the backend, encrypt the body with it
182    /// (S4E4 frame), and persist only the wrapped DEK. GETs sniffing as
183    /// S4E4 unwrap the DEK through the same backend before decrypt.
184    /// `kms_default_key_id` is used when the request omits an explicit
185    /// `x-amz-server-side-encryption-aws-kms-key-id` (mirrors AWS S3
186    /// bucket-default behaviour).
187    kms: Option<Arc<dyn crate::kms::KmsBackend>>,
188    kms_default_key_id: Option<String>,
189    /// v0.5 #30: optional Object Lock (WORM) enforcement layer. When
190    /// `Some(...)`, `delete_object` and overwrite-style `put_object`
191    /// consult the manager and refuse the operation with HTTP 403
192    /// `AccessDenied` while the object is locked (Compliance until
193    /// expiry, Governance unless the bypass header is set, or any time
194    /// a legal hold is on). PUT also auto-applies the bucket-default
195    /// retention to brand-new objects when configured. When `None`
196    /// (default), the legacy backend-passthrough behaviour applies, so
197    /// existing v0.4 deployments are unaffected until they explicitly
198    /// call `with_object_lock(...)`.
199    object_lock: Option<Arc<crate::object_lock::ObjectLockManager>>,
200    /// v0.6 #38: optional first-class CORS bucket configuration manager.
201    /// When `Some(...)`, S4-server itself owns per-bucket CORS rules and
202    /// `put_bucket_cors` / `get_bucket_cors` / `delete_bucket_cors`
203    /// consult the manager instead of passing through to the backend.
204    /// `handle_preflight` (public method on `S4Service`) routes OPTIONS-
205    /// style preflight matching through the same store; the actual HTTP
206    /// OPTIONS routing wire-up at the listener level is a follow-up
207    /// (s3s framework does not surface OPTIONS as a typed handler).
208    cors: Option<Arc<crate::cors::CorsManager>>,
209    /// v0.6 #36: optional first-class S3 Inventory manager. When
210    /// `Some(...)`, S4-server itself owns per-(bucket, id) inventory
211    /// configurations and `put_bucket_inventory_configuration` /
212    /// `get_bucket_inventory_configuration` /
213    /// `list_bucket_inventory_configurations` /
214    /// `delete_bucket_inventory_configuration` consult the manager
215    /// instead of passing through to the backend. The actual periodic
216    /// CSV emission is driven by a tokio task in `main.rs` that calls
217    /// `InventoryManager::run_once_for_test` on a fixed cadence; the
218    /// service handlers below only deal with config-level CRUD.
219    inventory: Option<Arc<crate::inventory::InventoryManager>>,
220    /// v0.6 #35: optional first-class S3 bucket-notification manager.
221    /// When `Some(...)`, S4-server itself owns per-bucket notification
222    /// configurations and `put_bucket_notification_configuration` /
223    /// `get_bucket_notification_configuration` consult the manager
224    /// instead of passing through to the backend. Successful PUT /
225    /// DELETE handlers fire matching destinations on a detached tokio
226    /// task (best-effort; see `crate::notifications::dispatch_event`).
227    notifications: Option<Arc<crate::notifications::NotificationManager>>,
228    /// v0.6 #37: optional first-class S3 Lifecycle configuration
229    /// manager. When `Some(...)`, S4-server itself owns per-bucket
230    /// lifecycle rules and `put_bucket_lifecycle_configuration` /
231    /// `get_bucket_lifecycle_configuration` /
232    /// `delete_bucket_lifecycle` consult the manager instead of
233    /// passing through to the backend. The actual background scanner
234    /// (list_objects_v2 -> evaluate -> delete / metadata-rewrite per
235    /// rule) is a v0.7+ follow-up; the test path
236    /// `S4Service::run_lifecycle_once_for_test` exercises the
237    /// evaluator end-to-end so this v0.6 #37 wiring is enough to ship
238    /// the configuration-management half without putting a
239    /// half-wired bucket-walk in front of users.
240    lifecycle: Option<Arc<crate::lifecycle::LifecycleManager>>,
241    /// v0.6 #39: optional first-class object + bucket Tagging manager.
242    /// When `Some(...)`, S4-server itself owns per-(bucket, key) and
243    /// per-bucket tag state — `PutObjectTagging` /
244    /// `GetObjectTagging` / `DeleteObjectTagging` /
245    /// `PutBucketTagging` / `GetBucketTagging` /
246    /// `DeleteBucketTagging` route through the manager (replacing the
247    /// previous backend-passthrough behaviour). `put_object` also
248    /// pre-parses the `x-amz-tagging` header / `Tagging` input field
249    /// so the IAM policy evaluator can gate on
250    /// `s3:RequestObjectTag/<key>` and `s3:ExistingObjectTag/<key>`.
251    /// On a successful PUT the parsed tags are persisted; on a
252    /// successful DELETE the matching tag entry is dropped.
253    tagging: Option<Arc<crate::tagging::TagManager>>,
254    /// v0.6 #40: optional first-class cross-bucket replication manager.
255    /// When `Some(...)`, S4-server itself owns per-bucket replication
256    /// rules; `PutBucketReplication` / `GetBucketReplication` /
257    /// `DeleteBucketReplication` route through the manager (replacing
258    /// the previous backend-passthrough behaviour). On every successful
259    /// `put_object` the manager's rule list is consulted; the
260    /// highest-priority matching enabled rule wins, the per-key status
261    /// is recorded as `Pending`, and the source body and metadata are
262    /// handed to a detached tokio task that PUTs to the destination
263    /// bucket through the same backend. The replica is stamped with
264    /// `x-amz-replication-status: REPLICA` in its metadata; the
265    /// source-side status is updated to `Completed` on success or
266    /// `Failed` after the 3-attempt retry budget is exhausted (drop
267    /// counter bumps in either-side case so dashboards see the loss).
268    /// `head_object` / `get_object` echo the recorded status back as
269    /// `x-amz-replication-status` so consumers can poll progress.
270    /// Limited to single-instance (same `S4Service`) replication; true
271    /// cross-region (multi-instance) is a v0.7+ follow-up.
272    replication: Option<Arc<crate::replication::ReplicationManager>>,
273    /// v0.6 #42: optional MFA-Delete enforcement layer. When `Some(...)`,
274    /// every DELETE / DELETE-version / delete-marker / `PutBucketVersioning`
275    /// request against a bucket whose MFA-Delete state is `Enabled`
276    /// must carry `x-amz-mfa: <serial> <code>` (RFC 6238 6-digit TOTP);
277    /// missing or invalid tokens return HTTP 403 `AccessDenied`. When
278    /// `None` (default), the gate is a no-op so existing v0.4 / v0.5
279    /// deployments are unaffected until they explicitly call
280    /// `with_mfa_delete(...)`.
281    mfa_delete: Option<Arc<crate::mfa::MfaDeleteManager>>,
282    /// v0.5 #32: when `true`, every PUT must carry an SSE indicator
283    /// (`x-amz-server-side-encryption`, the SSE-C customer-key headers,
284    /// or be matched against a configured server-managed keyring/KMS).
285    /// Set by `--compliance-mode strict` after the boot-time
286    /// prerequisite check passes.
287    compliance_strict: bool,
288    /// v0.7 #47: optional SigV4a (asymmetric ECDSA-P256-SHA256) verify
289    /// gate. When `Some(...)`, the listener-side middleware (see
290    /// [`crate::routing::try_sigv4a_verify`]) inspects every incoming
291    /// request and short-circuits SigV4a-signed ones — verifying the
292    /// signature against the credential store and returning 403
293    /// `SignatureDoesNotMatch` / `InvalidAccessKeyId` on failure. Plain
294    /// SigV4 (HMAC-SHA256) requests pass through to s3s untouched. When
295    /// `None`, the middleware is a no-op so the existing SigV4 path is
296    /// unaffected (operators opt in via `--sigv4a-credentials <DIR>`).
297    sigv4a_gate: Option<Arc<SigV4aGate>>,
298    /// v0.8 #54 BUG-5..10: per-`upload_id` side-table that ferries the
299    /// SSE / Tagging / Object-Lock context captured at
300    /// `CreateMultipartUpload` time through to `UploadPart` /
301    /// `CompleteMultipartUpload`. Always-on (no `with_*` flag) — the
302    /// store is gateway-internal and idle when no multipart is in
303    /// flight. See [`crate::multipart_state`] for rationale.
304    multipart_state: Arc<crate::multipart_state::MultipartStateStore>,
305    /// v0.8 #52: plaintext bytes per S4E5 chunk on the SSE-S4 PUT
306    /// path. `0` (default) → use the legacy buffered S4E2 path
307    /// (whole-body AES-GCM tag, GET buffers + verifies before
308    /// emitting). Non-zero → use the chunked S4E5 frame so GET can
309    /// stream-decrypt chunk-by-chunk. Wired by `--sse-chunk-size`
310    /// in `main.rs`. SSE-C and SSE-KMS are intentionally unaffected
311    /// (chunked variants tracked in a follow-up issue).
312    sse_chunk_size: usize,
313    /// v0.8.5 #86 (audit M-2): bounded permit pool gating the detached
314    /// replication dispatcher in [`Self::spawn_replication_if_matched`].
315    /// Without this cap, a high-volume PUT workload (1k req/s × N enabled
316    /// rules × slow destination = O(10k) in-flight tokio tasks) could
317    /// exhaust process memory before the destination drains. Each
318    /// dispatcher spawn `acquire_owned`s one permit and holds it for the
319    /// lifetime of the destination PUT + status stamp; once the cap is
320    /// reached the dispatcher async-blocks on `acquire_owned()` so the
321    /// listener path itself never stalls — only the in-flight replica
322    /// queue depth is bounded. Default 1024 (operator-tunable via
323    /// `--replication-max-concurrent`).
324    replication_semaphore: Arc<tokio::sync::Semaphore>,
325}
326
327impl<B: S3> S4Service<B> {
328    /// AWS S3 単発 PUT の API 上限 (5 GiB)
329    pub const DEFAULT_MAX_BODY_BYTES: usize = 5 * 1024 * 1024 * 1024;
330
331    /// v0.8.5 #86 (audit M-2): default cap on simultaneously-in-flight
332    /// replication dispatcher tasks. See the `replication_semaphore`
333    /// field doc for the rationale + override path.
334    pub const DEFAULT_REPLICATION_MAX_CONCURRENT: usize = 1024;
335
336    pub fn new(
337        backend: B,
338        registry: Arc<CodecRegistry>,
339        dispatcher: Arc<dyn CodecDispatcher>,
340    ) -> Self {
341        Self {
342            backend: Arc::new(backend),
343            registry,
344            dispatcher,
345            max_body_bytes: Self::DEFAULT_MAX_BODY_BYTES,
346            policy: None,
347            secure_transport: false,
348            rate_limits: None,
349            access_log: None,
350            sse_keyring: None,
351            versioning: None,
352            kms: None,
353            kms_default_key_id: None,
354            object_lock: None,
355            cors: None,
356            inventory: None,
357            notifications: None,
358            lifecycle: None,
359            tagging: None,
360            replication: None,
361            mfa_delete: None,
362            compliance_strict: false,
363            sigv4a_gate: None,
364            multipart_state: Arc::new(crate::multipart_state::MultipartStateStore::new()),
365            // v0.8 #52: chunked SSE-S4 disabled by default — opt
366            // in via `S4Service::with_sse_chunk_size(...)` /
367            // `--sse-chunk-size <BYTES>`. Default keeps the legacy
368            // S4E2 buffered path so existing deployments are
369            // bit-for-bit unchanged.
370            sse_chunk_size: 0,
371            // v0.8.5 #86 (audit M-2): default cap of 1024 in-flight
372            // replication tasks. Picked to be (a) ample headroom over a
373            // typical steady-state replication rate (the v0.8.3 #66
374            // status-sweep doc cites 1k keys/hour as a "steady" rate, so
375            // even a 100x burst lands well under 1024), (b) small enough
376            // that the worst-case memory pinned by stalled dispatchers
377            // — body bytes + metadata — stays bounded (1024 × 5 MiB
378            // typical S3 PUT ≈ 5 GiB, recoverable). Operators with
379            // wider cross-region fan-out can override via
380            // `--replication-max-concurrent`.
381            replication_semaphore: Arc::new(tokio::sync::Semaphore::new(
382                Self::DEFAULT_REPLICATION_MAX_CONCURRENT,
383            )),
384        }
385    }
386
387    /// v0.7 #47: attach the SigV4a verify gate. Once set, the
388    /// listener-side middleware (`crate::routing::try_sigv4a_verify`)
389    /// short-circuits any incoming `AWS4-ECDSA-P256-SHA256` request,
390    /// verifying it against the supplied credential store and
391    /// returning 403 on failure. Plain SigV4 (HMAC-SHA256) requests
392    /// are unaffected. When the gate is unset (default), the
393    /// middleware skips entirely so existing SigV4 deployments keep
394    /// working.
395    #[must_use]
396    pub fn with_sigv4a_gate(mut self, gate: Arc<SigV4aGate>) -> Self {
397        self.sigv4a_gate = Some(gate);
398        self
399    }
400
401    /// v0.7 #47: borrow the attached SigV4a gate. Used by `main.rs`
402    /// to snapshot the gate `Arc` before the s3s `ServiceBuilder`
403    /// consumes the `S4Service` (the listener-side middleware needs
404    /// the same `Arc` because s3s' SigV4 verifier rejects SigV4a
405    /// algorithm tokens with "unknown algorithm" — match has to
406    /// happen at the hyper layer instead).
407    #[must_use]
408    pub fn sigv4a_gate(&self) -> Option<&Arc<SigV4aGate>> {
409        self.sigv4a_gate.as_ref()
410    }
411
412    /// v0.8.2 #62: borrow the multipart state store so `main.rs` can
413    /// snapshot the `Arc` before the s3s `ServiceBuilder` consumes
414    /// the `S4Service`. The background `sweep_stale` task in `main.rs`
415    /// holds this `Arc` and ticks once an hour to drop abandoned
416    /// upload contexts (and their `Zeroizing<[u8; 32]>` SSE-C keys).
417    #[must_use]
418    pub fn multipart_state(&self) -> &Arc<crate::multipart_state::MultipartStateStore> {
419        &self.multipart_state
420    }
421
422    /// v0.6 #39: attach the in-memory object + bucket Tagging manager.
423    /// Once set, `Put/Get/Delete` `Object/Bucket Tagging` route
424    /// through the manager (instead of forwarding to the backend),
425    /// and `put_object`'s `x-amz-tagging` parse path becomes the
426    /// source of `s3:RequestObjectTag/<key>` for the IAM policy
427    /// evaluator. The manager itself is shared via `Arc`.
428    #[must_use]
429    pub fn with_tagging(mut self, mgr: Arc<crate::tagging::TagManager>) -> Self {
430        self.tagging = Some(mgr);
431        self
432    }
433
434    /// v0.6 #39: borrow the attached tagging manager (test /
435    /// introspection — the snapshotter in `main.rs`, when wired,
436    /// will keep its own `Arc` clone).
437    #[must_use]
438    pub fn tag_manager(&self) -> Option<&Arc<crate::tagging::TagManager>> {
439        self.tagging.as_ref()
440    }
441
442    /// v0.6 #36: attach the in-memory S3 Inventory manager. Once set,
443    /// `put_bucket_inventory_configuration` /
444    /// `get_bucket_inventory_configuration` /
445    /// `list_bucket_inventory_configurations` /
446    /// `delete_bucket_inventory_configuration` route through the
447    /// manager. The actual periodic CSV / manifest emission is
448    /// orchestrated by a tokio task started in `main.rs`; the manager
449    /// itself is shared between the handler and the scheduler via
450    /// `Arc`.
451    #[must_use]
452    pub fn with_inventory(mut self, mgr: Arc<crate::inventory::InventoryManager>) -> Self {
453        self.inventory = Some(mgr);
454        self
455    }
456
457    /// v0.6 #36: borrow the attached inventory manager (test /
458    /// introspection — the background scheduler in `main.rs` keeps its
459    /// own `Arc` clone, so this accessor is for the test path that
460    /// invokes `run_once_for_test` directly).
461    #[must_use]
462    pub fn inventory_manager(&self) -> Option<&Arc<crate::inventory::InventoryManager>> {
463        self.inventory.as_ref()
464    }
465
466    /// v0.6 #37: attach the in-memory S3 Lifecycle configuration
467    /// manager. Once set, `put_bucket_lifecycle_configuration` /
468    /// `get_bucket_lifecycle_configuration` / `delete_bucket_lifecycle`
469    /// route through the manager (replacing the previous backend-
470    /// passthrough behaviour). The actual periodic scanner that walks
471    /// the source bucket and invokes Expiration / Transition /
472    /// NoncurrentExpiration actions is a v0.7+ follow-up — see
473    /// [`Self::run_lifecycle_once_for_test`] for the in-memory test
474    /// path that exercises the evaluator end-to-end.
475    #[must_use]
476    pub fn with_lifecycle(mut self, mgr: Arc<crate::lifecycle::LifecycleManager>) -> Self {
477        self.lifecycle = Some(mgr);
478        self
479    }
480
481    /// v0.6 #37: borrow the attached lifecycle manager (test /
482    /// introspection — the background scheduler in `main.rs` keeps its
483    /// own `Arc` clone, so this accessor is for the test path that
484    /// invokes the evaluator directly).
485    #[must_use]
486    pub fn lifecycle_manager(&self) -> Option<&Arc<crate::lifecycle::LifecycleManager>> {
487        self.lifecycle.as_ref()
488    }
489
490    /// v0.6 #37: synchronous test entry that runs the lifecycle evaluator
491    /// against a caller-provided list of `(key, age, size, tags)` tuples
492    /// and returns the `(key, action)` pairs that should fire. The actual
493    /// backend invocation (S3.delete_object / metadata rewrite) is left
494    /// to the caller — the unit + E2E tests use this to verify the
495    /// evaluator without spawning the (deferred) background scanner.
496    /// Returns an empty `Vec` when no lifecycle manager is attached or
497    /// no rule matches.
498    #[must_use]
499    pub fn run_lifecycle_once_for_test(
500        &self,
501        bucket: &str,
502        objects: &[crate::lifecycle::EvaluateBatchEntry],
503    ) -> Vec<(String, crate::lifecycle::LifecycleAction)> {
504        let Some(mgr) = self.lifecycle.as_ref() else {
505            return Vec::new();
506        };
507        crate::lifecycle::evaluate_batch(mgr, bucket, objects)
508    }
509
510    /// v0.6 #35: attach the in-memory bucket-notification manager. Once
511    /// set, `put_bucket_notification_configuration` /
512    /// `get_bucket_notification_configuration` route through the manager
513    /// (replacing the previous backend-passthrough behaviour); successful
514    /// `put_object` / `delete_object` calls fire matching destinations
515    /// on a detached tokio task via
516    /// `crate::notifications::dispatch_event` (best-effort, fire-and-
517    /// forget — failures bump the manager's `dropped_total` counter and
518    /// log at warn but do NOT fail the originating S3 request).
519    #[must_use]
520    pub fn with_notifications(
521        mut self,
522        mgr: Arc<crate::notifications::NotificationManager>,
523    ) -> Self {
524        self.notifications = Some(mgr);
525        self
526    }
527
528    /// v0.6 #35: borrow the attached notifications manager (test /
529    /// introspection — used by the metrics layer to read
530    /// `dropped_total`).
531    #[must_use]
532    pub fn notifications_manager(&self) -> Option<&Arc<crate::notifications::NotificationManager>> {
533        self.notifications.as_ref()
534    }
535
536    /// v0.6 #35: internal helper used by the DELETE handlers to fire a
537    /// matching notification on a detached tokio task. No-op when no
538    /// manager is attached or no rule on the bucket matches the given
539    /// (event, key) tuple.
540    fn fire_delete_notification(
541        &self,
542        bucket: &str,
543        key: &str,
544        event: crate::notifications::EventType,
545        version_id: Option<String>,
546    ) {
547        let Some(mgr) = self.notifications.as_ref() else {
548            return;
549        };
550        let dests = mgr.match_destinations(bucket, &event, key);
551        if dests.is_empty() {
552            return;
553        }
554        tokio::spawn(crate::notifications::dispatch_event(
555            Arc::clone(mgr),
556            bucket.to_owned(),
557            key.to_owned(),
558            event,
559            None,
560            None,
561            version_id,
562            format!("S4-{}", uuid::Uuid::new_v4()),
563        ));
564    }
565
566    /// v0.6 #40: attach the in-memory cross-bucket replication manager.
567    /// Once set, `put_bucket_replication` / `get_bucket_replication` /
568    /// `delete_bucket_replication` route through the manager (replacing
569    /// the previous backend-passthrough behaviour); a successful
570    /// `put_object` whose key matches an enabled rule fires a detached
571    /// tokio task that PUTs the same body + metadata to the rule's
572    /// destination bucket, stamping the replica with
573    /// `x-amz-replication-status: REPLICA`. Failures after the retry
574    /// budget bump the manager's `dropped_total` counter and are
575    /// surfaced in the `s4_replication_dropped_total` Prometheus
576    /// counter; successes bump `s4_replication_replicated_total`.
577    #[must_use]
578    pub fn with_replication(mut self, mgr: Arc<crate::replication::ReplicationManager>) -> Self {
579        self.replication = Some(mgr);
580        self
581    }
582
583    /// v0.6 #40: borrow the attached replication manager (test /
584    /// introspection — used by the metrics layer to read
585    /// `dropped_total`).
586    #[must_use]
587    pub fn replication_manager(&self) -> Option<&Arc<crate::replication::ReplicationManager>> {
588        self.replication.as_ref()
589    }
590
591    /// v0.6 #40: internal helper used by the PUT handlers to fire a
592    /// detached cross-bucket replication task. No-op when no manager
593    /// is attached, the source backend PUT failed, or no rule on the
594    /// source bucket matches the (key, tags) tuple. The `body` is the
595    /// post-compression / post-encryption `Bytes` that was sent to
596    /// the source backend (refcount-cloned), and `metadata` is the
597    /// metadata map that already includes the manifest /
598    /// `s4-encrypted` markers — the replica decodes through the same
599    /// path. The destination PUT runs through `Arc<B>::put_object`.
600    ///
601    /// ## v0.8.2 #61: generation token + shadow-key destination
602    ///
603    /// `pending_version` is the source-side `PutOutcome` minted by the
604    /// caller's versioning branch (or `None` for unversioned /
605    /// suspended buckets). When `pending_version.versioned_response`
606    /// is `true`, the dispatcher writes the destination under the same
607    /// shadow path the source uses (`<key>.__s4ver__/<vid>`) so the
608    /// destination's version chain receives the new version the same
609    /// way `?versionId=` GET resolves it. Closes audit C-1.
610    ///
611    /// The dispatcher also mints a fresh `generation` token before
612    /// spawning, threaded through to [`crate::replication::
613    /// replicate_object`]. Closes audit C-3 — a stale retry of an
614    /// older PUT can no longer overwrite the destination's newer bytes
615    /// because the CAS guard sees the higher stored generation and
616    /// drops its destination write.
617    ///
618    /// ## Asymmetric versioning policy (out of scope)
619    ///
620    /// We assume source + destination buckets share the same
621    /// versioning policy (both Enabled or both Suspended /
622    /// Unversioned). Cross-bucket policy queries would require a
623    /// backend round-trip per replication, which is not worth it for
624    /// the single-instance scope. Operators who configure asymmetric
625    /// versioning will see destination-side `?versionId=` lookups
626    /// miss — documented as out-of-scope until a future per-rule
627    /// `destination_versioning_policy` knob lands.
628    // 8 args is the post-#61 shape: replication needs the
629    // source bucket+key, the canonical tag set for rule-matching,
630    // the post-codec body+metadata for the destination PUT, the
631    // backend-success gate, and the pending version-id for the
632    // shadow-key destination override. A shape struct would just
633    // split the (single) call site so opt for the inline form.
634    #[allow(clippy::too_many_arguments)]
635    fn spawn_replication_if_matched(
636        &self,
637        source_bucket: &str,
638        source_key: &str,
639        request_tags: &Option<crate::tagging::TagSet>,
640        body: &bytes::Bytes,
641        metadata: &Option<std::collections::HashMap<String, String>>,
642        backend_ok: bool,
643        pending_version: Option<&crate::versioning::PutOutcome>,
644    ) where
645        B: Send + Sync + 'static,
646    {
647        if !backend_ok {
648            return;
649        }
650        let Some(mgr) = self.replication.as_ref() else {
651            return;
652        };
653        // Pull the request's tags into the (k, v) shape the matcher
654        // expects. The tagging manager would have the canonical
655        // post-PUT view but at this point in the pipeline it's
656        // already been written above; for the rule-match decision
657        // the request's tags are sufficient (= the tags this PUT
658        // applies, S3 PutObject is full-replace on tags).
659        let object_tags: Vec<(String, String)> = request_tags
660            .as_ref()
661            .map(|ts| ts.iter().cloned().collect())
662            .unwrap_or_default();
663        let Some(rule) = mgr.match_rule(source_bucket, source_key, &object_tags) else {
664            return;
665        };
666        // v0.8.2 #61: mint the per-PUT generation BEFORE the eager
667        // Pending stamp so the stamp itself carries the right
668        // generation (the CAS in `record_status_if_newer` would
669        // otherwise see a `generation=0` Pending and accept any
670        // stale retry).
671        let generation = mgr.next_generation();
672        // Eagerly mark the source key as Pending so a HEAD between
673        // the source PUT returning and the spawned task completing
674        // surfaces the in-flight state. CAS-guarded so a slower
675        // older PUT can't downgrade a newer Completed back to Pending.
676        let _ = mgr.record_status_if_newer(
677            source_bucket,
678            source_key,
679            generation,
680            crate::replication::ReplicationStatus::Pending,
681        );
682        // v0.8.2 #61: derive the destination storage key. For a
683        // versioning-Enabled source the destination receives the
684        // same shadow-key path so a `?versionId=<vid>` GET on the
685        // destination resolves through the same lookup the source
686        // uses. Suspended / Unversioned sources keep the logical
687        // key (= `None` override = dispatcher uses `source_key`).
688        let destination_key_override = pending_version
689            .filter(|pv| pv.versioned_response)
690            .map(|pv| versioned_shadow_key(source_key, &pv.version_id));
691        // v0.8.3 #68 (audit M-1): capture the source object's Object
692        // Lock state so the dispatcher can decorate the destination
693        // PUT with the matching AWS-wire lock headers. Without this,
694        // a Compliance / Governance / legal-hold protected source
695        // would replicate to a destination where DELETE succeeds
696        // (the WORM posture would only hold on the source).
697        let source_lock_state = self
698            .object_lock
699            .as_ref()
700            .and_then(|mgr| mgr.get(source_bucket, source_key));
701        // v0.8.3 #68: hand the destination-side ObjectLockManager to
702        // the dispatcher closure so we can persist the propagated
703        // lock state on successful destination PUT (the destination
704        // PUT below bypasses S4Service::put_object — we drive the
705        // backend directly — so the explicit_lock_mode commit block
706        // in put_object never fires for replicas. We replay it here
707        // against the destination key.)
708        let dest_lock_mgr = self.object_lock.as_ref().map(Arc::clone);
709        let mgr_cl = Arc::clone(mgr);
710        let backend = Arc::clone(&self.backend);
711        let body_cl = body.clone();
712        let metadata_cl = metadata.clone();
713        let source_bucket_cl = source_bucket.to_owned();
714        let source_key_cl = source_key.to_owned();
715        let source_lock_state_for_closure = source_lock_state.clone();
716        let source_bucket_for_warn = source_bucket.to_owned();
717        // v0.8.5 #86 (audit M-2): bound the in-flight replication queue
718        // depth. Acquire happens INSIDE the spawned task (not on the
719        // listener path) so a saturated semaphore back-pressures the
720        // dispatcher pool without stalling the source PUT response —
721        // the source has already returned 200 to the client by the time
722        // the spawn body runs. A failed `acquire_owned` only happens
723        // when the semaphore is closed (we never close it, so the
724        // logged-and-skipped fallback is unreachable in practice).
725        let semaphore = Arc::clone(&self.replication_semaphore);
726        tokio::spawn(async move {
727            let _permit = match semaphore.acquire_owned().await {
728                Ok(p) => p,
729                Err(e) => {
730                    tracing::warn!(
731                        bucket = %source_bucket_cl,
732                        key = %source_key_cl,
733                        "S4 replication dispatcher could not acquire semaphore permit (closed? {e}); skipping replica"
734                    );
735                    return;
736                }
737            };
738            let do_put = move |dest_bucket: String,
739                               dest_key: String,
740                               dest_body: bytes::Bytes,
741                               dest_meta: Option<std::collections::HashMap<String, String>>| {
742                let backend = Arc::clone(&backend);
743                let dest_lock_mgr = dest_lock_mgr.clone();
744                let lock_state = source_lock_state_for_closure.clone();
745                let warn_src = source_bucket_for_warn.clone();
746                async move {
747                    let req = S3Request {
748                        input: PutObjectInput {
749                            bucket: dest_bucket.clone(),
750                            key: dest_key.clone(),
751                            body: Some(bytes_to_blob(dest_body)),
752                            metadata: dest_meta,
753                            ..Default::default()
754                        },
755                        method: http::Method::PUT,
756                        uri: "/".parse().unwrap(),
757                        headers: http::HeaderMap::new(),
758                        extensions: http::Extensions::new(),
759                        credentials: None,
760                        region: None,
761                        service: None,
762                        trailing_headers: None,
763                    };
764                    let put_result = backend
765                        .put_object(req)
766                        .await
767                        .map(|_| ())
768                        .map_err(|e| format!("destination put_object: {e}"));
769                    // v0.8.3 #68: on successful destination PUT,
770                    // persist the propagated lock state into the
771                    // destination's ObjectLockManager so a subsequent
772                    // DELETE on the destination is refused. Three cases:
773                    //   - PUT failed     → skip (no replica to protect)
774                    //   - lock_state None → nothing to propagate
775                    //   - dest manager None (operator misconfig)
776                    //                     → log warn-once + bump skip metric
777                    if put_result.is_ok()
778                        && let Some(state) = lock_state
779                    {
780                        match dest_lock_mgr {
781                            Some(ref mgr) => {
782                                mgr.set(&dest_bucket, &dest_key, state);
783                            }
784                            None => {
785                                crate::replication::warn_lock_propagation_skipped(
786                                    &warn_src,
787                                    &dest_bucket,
788                                );
789                            }
790                        }
791                    }
792                    put_result
793                }
794            };
795            // v0.8.5 #81 (audit H-7): wrap the dispatcher body in
796            // `futures::FutureExt::catch_unwind` so a panic inside
797            // `replicate_object` (or any of the user-supplied closures
798            // it drives — `do_put`, the destination backend, the lock
799            // manager) does NOT bubble out of the detached task as a
800            // `JoinError` that no operator dashboard scrapes. Caught
801            // panics bump `s4_dispatcher_panics_total{kind="replication"}`
802            // + log at ERROR with the panic payload, so silent feature
803            // degradation (= every replication PUT panicking and
804            // dropping the replica without any visible signal) becomes
805            // a first-class metric the operator can alert on.
806            //
807            // `AssertUnwindSafe` is required because the inner future
808            // captures `Arc<...>` clones + a `do_put` closure that are
809            // not `UnwindSafe` by default; the safety contract here is
810            // "we don't continue using any of those captures after the
811            // panic" which trivially holds (we drop them and return).
812            use futures::FutureExt as _;
813            let dispatcher_kind = "replication";
814            let fut = crate::replication::replicate_object(
815                rule,
816                source_bucket_cl,
817                source_key_cl,
818                body_cl,
819                metadata_cl,
820                do_put,
821                mgr_cl,
822                generation,
823                destination_key_override,
824                source_lock_state,
825            );
826            if let Err(panic) = std::panic::AssertUnwindSafe(fut).catch_unwind().await {
827                let panic_msg = panic
828                    .downcast_ref::<&'static str>()
829                    .copied()
830                    .map(str::to_owned)
831                    .or_else(|| panic.downcast_ref::<String>().cloned())
832                    .unwrap_or_else(|| "(non-string panic payload)".to_owned());
833                tracing::error!(
834                    kind = dispatcher_kind,
835                    panic_payload = %panic_msg,
836                    "S4 dispatcher task panicked (caught by catch_unwind, runtime not poisoned)"
837                );
838                crate::metrics::record_dispatcher_panic(dispatcher_kind);
839            }
840        });
841    }
842
843    /// v0.6 #42: attach the in-memory MFA-Delete enforcement manager.
844    /// Once set, every DELETE / DELETE-version / delete-marker /
845    /// `PutBucketVersioning` request against a bucket whose MFA-Delete
846    /// state is `Enabled` requires a valid `x-amz-mfa: <serial> <code>`
847    /// header (RFC 6238 6-digit TOTP); the gate is a no-op for buckets
848    /// where MFA-Delete is `Disabled` (S3 default).
849    #[must_use]
850    pub fn with_mfa_delete(mut self, mgr: Arc<crate::mfa::MfaDeleteManager>) -> Self {
851        self.mfa_delete = Some(mgr);
852        self
853    }
854
855    /// v0.6 #42: borrow the attached MFA-Delete manager (test /
856    /// introspection — used by the snapshot path in `main.rs` to call
857    /// `to_json` for restart-recoverable state).
858    #[must_use]
859    pub fn mfa_delete_manager(&self) -> Option<&Arc<crate::mfa::MfaDeleteManager>> {
860        self.mfa_delete.as_ref()
861    }
862
863    /// v0.6 #38: attach the in-memory CORS configuration manager. Once
864    /// set, `put_bucket_cors` / `get_bucket_cors` / `delete_bucket_cors`
865    /// route through the manager instead of forwarding to the backend,
866    /// and [`Self::handle_preflight`] becomes useful for the (future)
867    /// listener-side OPTIONS interceptor.
868    #[must_use]
869    pub fn with_cors(mut self, mgr: Arc<crate::cors::CorsManager>) -> Self {
870        self.cors = Some(mgr);
871        self
872    }
873
874    /// v0.6 #38: Borrow the attached CORS manager (test / introspection).
875    #[must_use]
876    pub fn cors_manager(&self) -> Option<&Arc<crate::cors::CorsManager>> {
877        self.cors.as_ref()
878    }
879
880    /// v0.6 #38: evaluate a CORS preflight request against the bucket's
881    /// configured rules and, if a rule matches, return the headers that
882    /// the (future) listener-side OPTIONS interceptor must put on the
883    /// 200 response: `Access-Control-Allow-Origin`, `Access-Control-
884    /// Allow-Methods`, `Access-Control-Allow-Headers`, optionally
885    /// `Access-Control-Max-Age` and `Access-Control-Expose-Headers`.
886    ///
887    /// Returns `None` when no manager is attached, no config is
888    /// registered for the bucket, or no rule matches the (origin,
889    /// method, headers) triple. The caller is responsible for turning
890    /// `None` into the appropriate 403 response.
891    ///
892    /// **Note:** the OPTIONS routing itself (i.e. wiring this method
893    /// into the hyper-util listener path) is a follow-up — s3s does not
894    /// surface OPTIONS as a typed S3 handler, so this method is
895    /// currently call-able only from inside other handlers and tests.
896    #[must_use]
897    pub fn handle_preflight(
898        &self,
899        bucket: &str,
900        origin: &str,
901        method: &str,
902        request_headers: &[String],
903    ) -> Option<std::collections::HashMap<String, String>> {
904        let mgr = self.cors.as_ref()?;
905        let rule = mgr.match_preflight(bucket, origin, method, request_headers)?;
906        let mut h = std::collections::HashMap::new();
907        // Echo the matched origin back. If the rule used "*" we still
908        // echo "*" (S3 spec — the spec does not require us to echo the
909        // *requesting* origin when the wildcard matched).
910        let allow_origin = if rule.allowed_origins.iter().any(|o| o == "*") {
911            "*".to_string()
912        } else {
913            origin.to_string()
914        };
915        h.insert("Access-Control-Allow-Origin".to_string(), allow_origin);
916        h.insert(
917            "Access-Control-Allow-Methods".to_string(),
918            rule.allowed_methods.join(", "),
919        );
920        if !rule.allowed_headers.is_empty() {
921            // For the Allow-Headers response, echo back the rule's
922            // pattern list verbatim (S3 echoes the configured list,
923            // including "*" if present). Browsers honour exact-match
924            // rules.
925            h.insert(
926                "Access-Control-Allow-Headers".to_string(),
927                rule.allowed_headers.join(", "),
928            );
929        }
930        if let Some(secs) = rule.max_age_seconds {
931            h.insert("Access-Control-Max-Age".to_string(), secs.to_string());
932        }
933        if !rule.expose_headers.is_empty() {
934            h.insert(
935                "Access-Control-Expose-Headers".to_string(),
936                rule.expose_headers.join(", "),
937            );
938        }
939        Some(h)
940    }
941
942    /// v0.5 #32: enable strict compliance mode. Every PUT must carry an
943    /// SSE indicator (server-side encryption header or SSE-C customer
944    /// key); requests without one are rejected with 400 InvalidRequest.
945    /// Boot-time prerequisite checking lives in the binary
946    /// (`validate_compliance_mode`) so this flag is purely the runtime
947    /// switch.
948    #[must_use]
949    pub fn with_compliance_strict(mut self, on: bool) -> Self {
950        self.compliance_strict = on;
951        self
952    }
953
954    /// v0.5 #30: attach the in-memory Object Lock (WORM) enforcement
955    /// manager. Once set, `delete_object` and overwrite-path
956    /// `put_object` refuse operations on locked keys with HTTP 403
957    /// `AccessDenied`; new PUTs to a bucket with a default retention
958    /// policy auto-create per-object lock state.
959    #[must_use]
960    pub fn with_object_lock(mut self, mgr: Arc<crate::object_lock::ObjectLockManager>) -> Self {
961        self.object_lock = Some(mgr);
962        self
963    }
964
965    /// v0.7 #45: borrow the attached Object Lock manager (read-only —
966    /// the lifecycle scanner uses this to skip currently-locked objects
967    /// before issuing `delete_object`, since an Object Lock always wins
968    /// over Lifecycle Expiration in AWS S3 semantics). Mirrors the
969    /// shape of [`Self::lifecycle_manager`] /
970    /// [`Self::tag_manager`] — purely additive accessor, no handler
971    /// behaviour change.
972    #[must_use]
973    pub fn object_lock_manager(&self) -> Option<&Arc<crate::object_lock::ObjectLockManager>> {
974        self.object_lock.as_ref()
975    }
976
977    /// v0.5 #28: attach an SSE-KMS backend. `default_key_id` is used
978    /// when a PUT requests SSE-KMS without naming a specific KMS key
979    /// (operators set this to mirror AWS S3's bucket-default key).
980    #[must_use]
981    pub fn with_kms_backend(
982        mut self,
983        kms: Arc<dyn crate::kms::KmsBackend>,
984        default_key_id: Option<String>,
985    ) -> Self {
986        self.kms = Some(kms);
987        self.kms_default_key_id = default_key_id;
988        self
989    }
990
991    /// v0.5 #34: attach the first-class versioning state machine. Once
992    /// set, this `S4Service` owns the per-bucket versioning state +
993    /// per-(bucket, key) version chain; `put_object` / `get_object` /
994    /// `delete_object` / `list_object_versions` /
995    /// `get_bucket_versioning` / `put_bucket_versioning` consult the
996    /// manager instead of passing through to the backend. The backend
997    /// is still used as the byte store: Suspended / Unversioned buckets
998    /// keep using `<key>` directly (legacy), Enabled buckets redirect
999    /// each version's bytes to a shadow key
1000    /// (`<key>.__s4ver__/<version-id>`) so older versions survive newer
1001    /// PUTs to the same logical key.
1002    #[must_use]
1003    pub fn with_versioning(mut self, mgr: Arc<crate::versioning::VersioningManager>) -> Self {
1004        self.versioning = Some(mgr);
1005        self
1006    }
1007
1008    /// v0.8.5 #86 (audit M-3): borrow the attached versioning manager so
1009    /// the SIGUSR1 snapshot dump-back hook in `main.rs` can re-emit the
1010    /// in-memory state to the operator's `--versioning-state-file`
1011    /// without restarting the gateway. Mirrors the shape of
1012    /// [`Self::object_lock_manager`] / [`Self::lifecycle_manager`] —
1013    /// purely additive accessor, no handler behaviour change.
1014    #[must_use]
1015    pub fn versioning_manager(&self) -> Option<&Arc<crate::versioning::VersioningManager>> {
1016        self.versioning.as_ref()
1017    }
1018
1019    /// v0.8.5 #86 (audit M-2): override the default replication-dispatch
1020    /// concurrency cap (1024). Wired by the `--replication-max-concurrent`
1021    /// CLI flag in `main.rs`. Operators running heavy cross-region
1022    /// fan-out may need to raise this; operators on memory-constrained
1023    /// hosts may need to lower it. The new value replaces the existing
1024    /// `Semaphore` (so calling this after dispatchers are already in
1025    /// flight is fine — the in-flight tasks hold permits from the old
1026    /// semaphore which is dropped when its last permit is released).
1027    /// A `max` of 0 would deadlock all replicas; the value is silently
1028    /// clamped to 1 instead.
1029    #[must_use]
1030    pub fn with_replication_max_concurrent(mut self, max: usize) -> Self {
1031        let max = max.max(1);
1032        self.replication_semaphore = Arc::new(tokio::sync::Semaphore::new(max));
1033        self
1034    }
1035
1036    /// v0.8.5 #86 (audit M-2): borrow the in-flight replication
1037    /// concurrency permit pool. Tests inspect `available_permits()`
1038    /// after invoking `spawn_replication_if_matched` to verify the
1039    /// dispatcher actually `acquire_owned`s before kicking off the
1040    /// destination PUT.
1041    #[must_use]
1042    pub fn replication_semaphore(&self) -> &Arc<tokio::sync::Semaphore> {
1043        &self.replication_semaphore
1044    }
1045
1046    /// v0.4 #21 (kept for back-compat): attach a single SSE-S4 key.
1047    /// Internally wraps it in a 1-slot keyring with id=1 active, so
1048    /// new objects ride the v0.5 S4E2 frame while previously-written
1049    /// S4E1 bytes (this same key) still decrypt via the keyring's S4E1
1050    /// fallback path. Operators wanting true rotation should call
1051    /// [`Self::with_sse_keyring`] instead.
1052    #[must_use]
1053    pub fn with_sse_key(mut self, key: crate::sse::SharedSseKey) -> Self {
1054        let keyring = crate::sse::SseKeyring::new(1, key);
1055        self.sse_keyring = Some(std::sync::Arc::new(keyring));
1056        self
1057    }
1058
1059    /// v0.5 #29: attach a multi-key SSE-S4 keyring. PUT encrypts under
1060    /// the active key (S4E2 frame stamped with that key's id); GET
1061    /// dispatches on the body's magic — S4E1 falls back to trying every
1062    /// key in the ring (active first) so v0.4 objects survive a
1063    /// migration; S4E2 looks up the explicit key_id from the header.
1064    #[must_use]
1065    pub fn with_sse_keyring(mut self, keyring: crate::sse::SharedSseKeyring) -> Self {
1066        self.sse_keyring = Some(keyring);
1067        self
1068    }
1069
1070    /// v0.8 #52: opt the SSE-S4 PUT path into the chunked S4E5 frame
1071    /// (so the matching GET can stream-decrypt chunk-by-chunk
1072    /// instead of buffering the entire body before tag verify).
1073    /// `bytes` is the plaintext slice size — typically 1 MiB; 0
1074    /// disables the path and reverts to the legacy S4E2 buffered
1075    /// frame.
1076    ///
1077    /// SSE-C (S4E3) and SSE-KMS (S4E4) are intentionally untouched:
1078    /// the chunked envelopes for those flows are a follow-up issue
1079    /// (the customer-key wire surface needs separate version
1080    /// negotiation).
1081    ///
1082    /// Has no effect when `with_sse_keyring` / `with_sse_key` is
1083    /// not also set — the chunked path runs only on the SSE-S4
1084    /// branch of `put_object`.
1085    #[must_use]
1086    pub fn with_sse_chunk_size(mut self, bytes: usize) -> Self {
1087        self.sse_chunk_size = bytes;
1088        self
1089    }
1090
1091    /// v0.4 #20: attach an S3-style access-log emitter. Each completed
1092    /// PUT / GET / DELETE / List handler emits one entry into the
1093    /// emitter's buffer; a background flusher (started separately, see
1094    /// [`crate::access_log::AccessLog::spawn_flusher`]) writes hourly
1095    /// rotated `.log` files into the configured directory.
1096    #[must_use]
1097    pub fn with_access_log(mut self, log: crate::access_log::SharedAccessLog) -> Self {
1098        self.access_log = Some(log);
1099        self
1100    }
1101
1102    /// Capture the per-request access-log preamble before the request is
1103    /// consumed by the backend call. Returns `None` if no access logger
1104    /// is configured (cheap early-out so the handler doesn't pay the
1105    /// header-clone cost when access logging is off).
1106    fn access_log_preamble<I>(&self, req: &S3Request<I>) -> Option<AccessLogPreamble> {
1107        self.access_log.as_ref()?;
1108        Some(AccessLogPreamble {
1109            remote_ip: req
1110                .headers
1111                .get("x-forwarded-for")
1112                .and_then(|v| v.to_str().ok())
1113                .and_then(|raw| raw.split(',').next())
1114                .map(|s| s.trim().to_owned()),
1115            requester: Self::principal_of(req).map(str::to_owned),
1116            request_uri: format!("{} {}", req.method, req.uri.path()),
1117            user_agent: req
1118                .headers
1119                .get("user-agent")
1120                .and_then(|v| v.to_str().ok())
1121                .map(str::to_owned),
1122        })
1123    }
1124
1125    /// Internal — called by handlers at end-of-request with a captured
1126    /// preamble. Best-effort: swallows the await fast (clones Arc +
1127    /// pushes), no error propagation back to the request path.
1128    #[allow(clippy::too_many_arguments)]
1129    async fn record_access(
1130        &self,
1131        preamble: Option<AccessLogPreamble>,
1132        operation: &'static str,
1133        bucket: &str,
1134        key: Option<&str>,
1135        http_status: u16,
1136        bytes_sent: u64,
1137        object_size: u64,
1138        total_time_ms: u64,
1139        error_code: Option<&str>,
1140    ) {
1141        let (Some(log), Some(p)) = (self.access_log.as_ref(), preamble) else {
1142            return;
1143        };
1144        log.record(crate::access_log::AccessLogEntry {
1145            time: std::time::SystemTime::now(),
1146            bucket: bucket.to_owned(),
1147            remote_ip: p.remote_ip,
1148            requester: p.requester,
1149            operation,
1150            key: key.map(str::to_owned),
1151            request_uri: p.request_uri,
1152            http_status,
1153            error_code: error_code.map(str::to_owned),
1154            bytes_sent,
1155            object_size,
1156            total_time_ms,
1157            user_agent: p.user_agent,
1158        })
1159        .await;
1160    }
1161
1162    /// v0.4 #19: attach a per-(principal, bucket) token-bucket rate limiter.
1163    /// When set, every PUT / GET / DELETE / List / Copy / multipart op is
1164    /// throttle-checked before the policy gate; throttled requests return
1165    /// `S3ErrorCode::SlowDown` (HTTP 503) and bump
1166    /// `s4_rate_limit_throttled_total{principal,bucket}`.
1167    #[must_use]
1168    pub fn with_rate_limits(mut self, rl: crate::rate_limit::SharedRateLimits) -> Self {
1169        self.rate_limits = Some(rl);
1170        self
1171    }
1172
1173    /// Helper used by request handlers to apply the rate limit. Returns
1174    /// `Ok(())` when allowed (or no rate limiter is configured), or a
1175    /// `SlowDown` S3Error otherwise.
1176    fn enforce_rate_limit<I>(&self, req: &S3Request<I>, bucket: &str) -> S3Result<()> {
1177        let Some(rl) = self.rate_limits.as_ref() else {
1178            return Ok(());
1179        };
1180        let principal_id = Self::principal_of(req);
1181        if !rl.check(principal_id, bucket) {
1182            crate::metrics::record_rate_limit_throttle(principal_id.unwrap_or("-"), bucket);
1183            return Err(S3Error::with_message(
1184                S3ErrorCode::SlowDown,
1185                format!("rate-limited: bucket={bucket}"),
1186            ));
1187        }
1188        Ok(())
1189    }
1190
1191    /// Tell the policy evaluator that the listener is reached over TLS
1192    /// (or ACME). When `true`, the `aws:SecureTransport` Condition key
1193    /// resolves to `true`. Defaults to `false`.
1194    #[must_use]
1195    pub fn with_secure_transport(mut self, on: bool) -> Self {
1196        self.secure_transport = on;
1197        self
1198    }
1199
1200    #[must_use]
1201    pub fn with_max_body_bytes(mut self, n: usize) -> Self {
1202        self.max_body_bytes = n;
1203        self
1204    }
1205
1206    /// Attach an optional bucket policy (v0.2 #7). When `Some(...)`, every
1207    /// PUT / GET / DELETE / List handler runs `policy.evaluate(...)` before
1208    /// delegating to the backend; failures return `S3ErrorCode::AccessDenied`.
1209    /// When `None` (the default), no policy enforcement happens.
1210    #[must_use]
1211    pub fn with_policy(mut self, policy: crate::policy::SharedPolicy) -> Self {
1212        self.policy = Some(policy);
1213        self
1214    }
1215
1216    /// Pull the SigV4 access key id off the request's credentials, if any.
1217    /// Used as the `principal_id` for policy evaluation.
1218    fn principal_of<I>(req: &S3Request<I>) -> Option<&str> {
1219        req.credentials.as_ref().map(|c| c.access_key.as_str())
1220    }
1221
1222    /// v0.3 #13: build the per-request policy context from the incoming
1223    /// `S3Request`. Pulls `aws:UserAgent` from the User-Agent header,
1224    /// `aws:SourceIp` from the standard `X-Forwarded-For` header (most
1225    /// production deployments are behind an LB / reverse proxy that sets
1226    /// this), `aws:CurrentTime` from the system clock, and
1227    /// `aws:SecureTransport` from the per-listener TLS flag.
1228    fn request_context<I>(&self, req: &S3Request<I>) -> crate::policy::RequestContext {
1229        let user_agent = req
1230            .headers
1231            .get("user-agent")
1232            .and_then(|v| v.to_str().ok())
1233            .map(str::to_owned);
1234        // X-Forwarded-For is `client, proxy1, proxy2`; the leftmost entry
1235        // is the original client. Trim and parse leniently.
1236        let source_ip = req
1237            .headers
1238            .get("x-forwarded-for")
1239            .and_then(|v| v.to_str().ok())
1240            .and_then(|raw| raw.split(',').next())
1241            .and_then(|s| s.trim().parse().ok());
1242        crate::policy::RequestContext {
1243            source_ip,
1244            user_agent,
1245            request_time: Some(std::time::SystemTime::now()),
1246            secure_transport: self.secure_transport,
1247            existing_object_tags: None,
1248            request_object_tags: None,
1249            extra: Default::default(),
1250        }
1251    }
1252
1253    /// Helper used by request handlers to enforce the optional policy.
1254    /// Returns `Ok(())` when allowed (or no policy is configured), or an
1255    /// `AccessDenied` S3Error otherwise. Bumps the policy denial Prometheus
1256    /// counter on deny.
1257    fn enforce_policy<I>(
1258        &self,
1259        req: &S3Request<I>,
1260        action: &'static str,
1261        bucket: &str,
1262        key: Option<&str>,
1263    ) -> S3Result<()> {
1264        self.enforce_policy_with_extra(req, action, bucket, key, None, None)
1265    }
1266
1267    /// v0.6 #39: variant of [`Self::enforce_policy`] that lets the
1268    /// caller plumb tag context (existing-on-object + on-request) into
1269    /// the policy evaluator. Both arguments default to `None`, in
1270    /// which case the resulting `RequestContext` is identical to
1271    /// [`Self::enforce_policy`]'s — so for handlers that don't deal
1272    /// with tags this is a transparent no-op.
1273    fn enforce_policy_with_extra<I>(
1274        &self,
1275        req: &S3Request<I>,
1276        action: &'static str,
1277        bucket: &str,
1278        key: Option<&str>,
1279        request_tags: Option<&crate::tagging::TagSet>,
1280        existing_tags: Option<&crate::tagging::TagSet>,
1281    ) -> S3Result<()> {
1282        let Some(policy) = self.policy.as_ref() else {
1283            return Ok(());
1284        };
1285        let principal_id = Self::principal_of(req);
1286        let mut ctx = self.request_context(req);
1287        if let Some(t) = request_tags {
1288            ctx.request_object_tags = Some(t.clone());
1289        }
1290        if let Some(t) = existing_tags {
1291            ctx.existing_object_tags = Some(t.clone());
1292        }
1293        let decision = policy.evaluate_with(action, bucket, key, principal_id, &ctx);
1294        if decision.allow {
1295            Ok(())
1296        } else {
1297            crate::metrics::record_policy_denial(action, bucket);
1298            tracing::info!(
1299                action,
1300                bucket,
1301                key = ?key,
1302                principal = ?principal_id,
1303                source_ip = ?ctx.source_ip,
1304                user_agent = ?ctx.user_agent,
1305                secure_transport = ctx.secure_transport,
1306                matched_sid = ?decision.matched_sid,
1307                effect = ?decision.matched_effect,
1308                "S4 policy denied request"
1309            );
1310            Err(S3Error::with_message(
1311                S3ErrorCode::AccessDenied,
1312                format!("denied by S4 policy: {action} on bucket={bucket}"),
1313            ))
1314        }
1315    }
1316
1317    /// テスト用: backend を取り戻す (test helper、production では使わない).
1318    /// v0.6 #40 で `backend` が `Arc<B>` 化したので `Arc::try_unwrap` で
1319    /// 1-clone の場合のみ返す。共有されている (= replication dispatcher が
1320    /// 同じ Arc を持っていて未完了) 場合は `Err` を返さず panic させる
1321    /// (test 用途専用 helper の caller 契約を維持)。
1322    pub fn into_backend(self) -> B {
1323        Arc::try_unwrap(self.backend).unwrap_or_else(|_| {
1324            panic!("into_backend: backend Arc still shared (replication dispatcher in flight?)")
1325        })
1326    }
1327
1328    /// 必要 frame だけを backend に Range GET し、frame parse + decompress + slice
1329    /// した結果を返す sidecar fast path。Range request の **帯域節約版**。
1330    async fn partial_range_get(
1331        &self,
1332        req: &S3Request<GetObjectInput>,
1333        plan: s4_codec::index::RangePlan,
1334        client_start: u64,
1335        client_end_exclusive: u64,
1336        total_original: u64,
1337        get_start: Instant,
1338    ) -> S3Result<S3Response<GetObjectOutput>> {
1339        // 必要 byte 範囲だけを backend に partial GET
1340        let backend_range = s3s::dto::Range::Int {
1341            first: plan.byte_start,
1342            last: Some(plan.byte_end_exclusive - 1),
1343        };
1344        let backend_input = GetObjectInput {
1345            bucket: req.input.bucket.clone(),
1346            key: req.input.key.clone(),
1347            range: Some(backend_range),
1348            ..Default::default()
1349        };
1350        let backend_req = S3Request {
1351            input: backend_input,
1352            method: req.method.clone(),
1353            uri: req.uri.clone(),
1354            headers: req.headers.clone(),
1355            extensions: http::Extensions::new(),
1356            credentials: req.credentials.clone(),
1357            region: req.region.clone(),
1358            service: req.service.clone(),
1359            trailing_headers: None,
1360        };
1361        let mut backend_resp = self.backend.get_object(backend_req).await?;
1362        let blob = backend_resp.output.body.take().ok_or_else(|| {
1363            S3Error::with_message(
1364                S3ErrorCode::InternalError,
1365                "backend partial GET returned empty body",
1366            )
1367        })?;
1368        let bytes = collect_blob(blob, self.max_body_bytes)
1369            .await
1370            .map_err(internal("collect partial body"))?;
1371
1372        // frame parse + decompress
1373        let mut combined = BytesMut::new();
1374        for frame in FrameIter::new(bytes) {
1375            let (header, payload) = frame.map_err(|e| {
1376                S3Error::with_message(
1377                    S3ErrorCode::InternalError,
1378                    format!("partial-range frame parse: {e}"),
1379                )
1380            })?;
1381            let chunk_manifest = ChunkManifest {
1382                codec: header.codec,
1383                original_size: header.original_size,
1384                compressed_size: header.compressed_size,
1385                crc32c: header.crc32c,
1386            };
1387            let decompressed = self
1388                .registry
1389                .decompress(payload, &chunk_manifest)
1390                .await
1391                .map_err(internal("partial-range decompress"))?;
1392            combined.extend_from_slice(&decompressed);
1393        }
1394        let combined = combined.freeze();
1395        let sliced = combined
1396            .slice(plan.slice_start_in_combined as usize..plan.slice_end_in_combined as usize);
1397
1398        // response 組立て
1399        let returned_size = sliced.len() as u64;
1400        backend_resp.output.content_length = Some(returned_size as i64);
1401        backend_resp.output.content_range = Some(format!(
1402            "bytes {client_start}-{}/{total_original}",
1403            client_end_exclusive - 1
1404        ));
1405        backend_resp.output.checksum_crc32 = None;
1406        backend_resp.output.checksum_crc32c = None;
1407        backend_resp.output.checksum_crc64nvme = None;
1408        backend_resp.output.checksum_sha1 = None;
1409        backend_resp.output.checksum_sha256 = None;
1410        backend_resp.output.e_tag = None;
1411        backend_resp.output.body = Some(bytes_to_blob(sliced));
1412        backend_resp.status = Some(http::StatusCode::PARTIAL_CONTENT);
1413
1414        let elapsed = get_start.elapsed();
1415        crate::metrics::record_get(
1416            "partial",
1417            plan.byte_end_exclusive - plan.byte_start,
1418            returned_size,
1419            elapsed.as_secs_f64(),
1420            true,
1421        );
1422        info!(
1423            op = "get_object",
1424            bucket = %req.input.bucket,
1425            key = %req.input.key,
1426            bytes_in = plan.byte_end_exclusive - plan.byte_start,
1427            bytes_out = returned_size,
1428            total_object_size = total_original,
1429            range = true,
1430            path = "sidecar-partial",
1431            latency_ms = elapsed.as_millis() as u64,
1432            "S4 partial Range GET via sidecar index"
1433        );
1434        Ok(backend_resp)
1435    }
1436
1437    /// `<key>.s4index` sidecar object を backend に書く。失敗しても本体 PUT は
1438    /// 成功扱いにしたいので、err は warn ログのみ (Range GET の partial path が
1439    /// 使えなくなるが、full read fallback で意味的には正しい結果を返す)。
1440    async fn write_sidecar(&self, bucket: &str, key: &str, index: &FrameIndex) {
1441        let bytes = encode_index(index);
1442        let len = bytes.len() as i64;
1443        let sidecar = sidecar_key(key);
1444        // v0.7 #49: synthetic re-entry URI must be percent-encoded; if
1445        // the (already legally-arbitrary) S3 key produces something we
1446        // cannot encode at all, drop the sidecar PUT (the GET path
1447        // falls back to a full read on a missing sidecar) instead of
1448        // panicking on `parse().unwrap()`.
1449        let uri = match safe_object_uri(bucket, &sidecar) {
1450            Ok(u) => u,
1451            Err(e) => {
1452                tracing::warn!(
1453                    bucket,
1454                    key,
1455                    "S4 write_sidecar skipped (key not URI-encodable): {e}"
1456                );
1457                return;
1458            }
1459        };
1460        let put_input = PutObjectInput {
1461            bucket: bucket.into(),
1462            key: sidecar,
1463            body: Some(bytes_to_blob(bytes)),
1464            content_length: Some(len),
1465            content_type: Some("application/x-s4-index".into()),
1466            ..Default::default()
1467        };
1468        let put_req = S3Request {
1469            input: put_input,
1470            method: http::Method::PUT,
1471            uri,
1472            headers: http::HeaderMap::new(),
1473            extensions: http::Extensions::new(),
1474            credentials: None,
1475            region: None,
1476            service: None,
1477            trailing_headers: None,
1478        };
1479        if let Err(e) = self.backend.put_object(put_req).await {
1480            tracing::warn!(
1481                bucket,
1482                key,
1483                "S4 write_sidecar failed (Range GET will fall back to full read): {e}"
1484            );
1485        }
1486    }
1487
1488    /// v0.8.4 #73 H-2: confirm that the sidecar we just decoded still
1489    /// describes the current backend object before we trust its frame
1490    /// offsets for a partial Range GET. The sidecar carries the source
1491    /// `etag` and `compressed_size` that were observed at PUT time; we
1492    /// HEAD the backend object and compare.
1493    ///
1494    /// Decision matrix:
1495    /// - sidecar `source_etag = None` (legacy v1 / build_index_from_body
1496    ///   that wasn't stamped) → return `true` (best-effort, preserves
1497    ///   pre-v0.8.4 behaviour for existing on-disk sidecars).
1498    /// - HEAD fails → return `false` (we can't tell either way; full GET
1499    ///   path will surface the real backend error to the client).
1500    /// - HEAD ETag matches → `true`.
1501    /// - HEAD ETag differs OR HEAD size differs from
1502    ///   `source_compressed_size` → `false` (sidecar stale or attacker-
1503    ///   written; fall back to full GET).
1504    async fn sidecar_version_binding_ok(
1505        &self,
1506        bucket: &str,
1507        key: &str,
1508        index: &FrameIndex,
1509    ) -> bool {
1510        let Some(ref expected_etag) = index.source_etag else {
1511            // Legacy sidecar without the v0.8.4 #73 H-2 binding —
1512            // back-compat: trust it (the partial fetch is the same
1513            // best-effort path that v0.8.3 and earlier shipped).
1514            return true;
1515        };
1516        let head_input = HeadObjectInput {
1517            bucket: bucket.into(),
1518            key: key.into(),
1519            ..Default::default()
1520        };
1521        let uri = match safe_object_uri(bucket, key) {
1522            Ok(u) => u,
1523            Err(_) => return false,
1524        };
1525        let head_req = S3Request {
1526            input: head_input,
1527            method: http::Method::HEAD,
1528            uri,
1529            headers: http::HeaderMap::new(),
1530            extensions: http::Extensions::new(),
1531            credentials: None,
1532            region: None,
1533            service: None,
1534            trailing_headers: None,
1535        };
1536        let head = match self.backend.head_object(head_req).await {
1537            Ok(r) => r.output,
1538            Err(e) => {
1539                tracing::debug!(
1540                    bucket,
1541                    key,
1542                    "S4 sidecar version-binding HEAD failed, falling back to full GET: {e}"
1543                );
1544                return false;
1545            }
1546        };
1547        // ETag is a strong-vs-weak enum; we compare on the unwrapped string
1548        // form (matches what the PUT path stamped — see below).
1549        let live_etag = head.e_tag.as_ref().map(|t| t.value());
1550        if live_etag != Some(expected_etag.as_str()) {
1551            tracing::debug!(
1552                bucket,
1553                key,
1554                "sidecar stale (ETag mismatch), falling back to full GET (sidecar={:?}, live={:?})",
1555                expected_etag,
1556                live_etag,
1557            );
1558            return false;
1559        }
1560        if let Some(expected_size) = index.source_compressed_size
1561            && let Some(live_size) = head.content_length
1562            && live_size as u64 != expected_size
1563        {
1564            tracing::debug!(
1565                bucket,
1566                key,
1567                "sidecar stale (size mismatch), falling back to full GET (sidecar={}, live={})",
1568                expected_size,
1569                live_size,
1570            );
1571            return false;
1572        }
1573        true
1574    }
1575
1576    /// `<key>.s4index` sidecar を backend から読み出す。なければ None。
1577    async fn read_sidecar(&self, bucket: &str, key: &str) -> Option<FrameIndex> {
1578        let sidecar = sidecar_key(key);
1579        // v0.7 #49: same encode-or-bail treatment as write_sidecar.
1580        let uri = safe_object_uri(bucket, &sidecar).ok()?;
1581        let get_input = GetObjectInput {
1582            bucket: bucket.into(),
1583            key: sidecar,
1584            ..Default::default()
1585        };
1586        let get_req = S3Request {
1587            input: get_input,
1588            method: http::Method::GET,
1589            uri,
1590            headers: http::HeaderMap::new(),
1591            extensions: http::Extensions::new(),
1592            credentials: None,
1593            region: None,
1594            service: None,
1595            trailing_headers: None,
1596        };
1597        let resp = self.backend.get_object(get_req).await.ok()?;
1598        let blob = resp.output.body?;
1599        let bytes = collect_blob(blob, 64 * 1024 * 1024).await.ok()?;
1600        decode_index(bytes).ok()
1601    }
1602
1603    /// Multipart object (frame 列) を解凍 → 元 bytes を再構築。
1604    ///
1605    /// **per-frame codec dispatch**: 各 frame header に codec_id が入っているので、
1606    /// frame ごとに registry が違う codec を呼ぶことができる。同一 object 内で
1607    /// 異なる codec が混在していても透過的に解凍可能 (parquet 風 mixed columns 等)。
1608    async fn decompress_multipart(&self, bytes: bytes::Bytes) -> S3Result<bytes::Bytes> {
1609        let mut out = BytesMut::new();
1610        for frame in FrameIter::new(bytes) {
1611            let (header, payload) = frame.map_err(|e| {
1612                S3Error::with_message(
1613                    S3ErrorCode::InternalError,
1614                    format!("multipart frame parse: {e}"),
1615                )
1616            })?;
1617            let chunk_manifest = ChunkManifest {
1618                codec: header.codec,
1619                original_size: header.original_size,
1620                compressed_size: header.compressed_size,
1621                crc32c: header.crc32c,
1622            };
1623            let decompressed = self
1624                .registry
1625                .decompress(payload, &chunk_manifest)
1626                .await
1627                .map_err(internal("multipart frame decompress"))?;
1628            out.extend_from_slice(&decompressed);
1629        }
1630        Ok(out.freeze())
1631    }
1632}
1633
1634/// Parse a CopySourceRange header value (`bytes=N-M`, `bytes=N-`, `bytes=-N`)
1635/// into the s3s::dto::Range used by the GetObject path. The S3 spec only
1636/// allows `bytes=N-M` for upload_part_copy (no suffix or open-ended), so
1637/// reject the other variants for parity with AWS.
1638fn parse_copy_source_range(s: &str) -> Result<s3s::dto::Range, String> {
1639    let rest = s
1640        .strip_prefix("bytes=")
1641        .ok_or_else(|| format!("CopySourceRange must start with 'bytes=', got {s:?}"))?;
1642    let (a, b) = rest
1643        .split_once('-')
1644        .ok_or_else(|| format!("CopySourceRange must be 'bytes=N-M', got {s:?}"))?;
1645    let first: u64 = a
1646        .parse()
1647        .map_err(|_| format!("CopySourceRange first byte not a number: {a:?}"))?;
1648    let last: u64 = b
1649        .parse()
1650        .map_err(|_| format!("CopySourceRange last byte not a number: {b:?}"))?;
1651    if last < first {
1652        return Err(format!("CopySourceRange last < first: {s:?}"));
1653    }
1654    Ok(s3s::dto::Range::Int {
1655        first,
1656        last: Some(last),
1657    })
1658}
1659
1660/// v0.5 #34: synthesize the backend storage key for a given
1661/// (logical key, version-id) pair on an Enabled-versioning bucket.
1662///
1663/// Uses the `__s4ver__/` infix because:
1664/// - it's not a substring of `.s4index` / `.s4ver` natural keys (no false-positive
1665///   listing filter collisions)
1666/// - directory-style separator keeps S3 console "browse by prefix" UX intact
1667///   (versions roll up under one virtual folder per object)
1668/// - human-readable on debug logs / `aws s3 ls`
1669///
1670/// `list_objects` / `list_objects_v2` / `list_object_versions` MUST filter
1671/// keys containing `.__s4ver__/` from results so customers don't see internal
1672/// shadow objects.
1673pub fn versioned_shadow_key(key: &str, version_id: &str) -> String {
1674    format!("{key}.__s4ver__/{version_id}")
1675}
1676
1677/// Test for the marker substring used by [`versioned_shadow_key`]. Cheap str
1678/// scan; both list_objects filter and the GET passthrough check use this.
1679fn is_versioning_shadow_key(key: &str) -> bool {
1680    key.contains(".__s4ver__/")
1681}
1682
1683/// v0.6 #42: wall-clock seconds since the UNIX epoch — fed to
1684/// `mfa::check_mfa` so the TOTP verifier can match the client's
1685/// authenticator app's view of "now". Falls back to `0` on the
1686/// (impossible-in-practice) clock-before-1970 path so the verifier
1687/// rejects rather than panicking.
1688fn current_unix_secs() -> u64 {
1689    std::time::SystemTime::now()
1690        .duration_since(std::time::UNIX_EPOCH)
1691        .map(|d| d.as_secs())
1692        .unwrap_or(0)
1693}
1694
1695/// v0.6 #42: translate an `MfaError` into the matching S3 wire error.
1696///
1697/// - `Missing` / `SerialMismatch` / `InvalidCode` → `403 AccessDenied`
1698///   (S3 spec for MFA Delete: every gating failure surfaces as
1699///   `AccessDenied`, not a separate `MFA*` code).
1700/// - `Malformed` → `400 InvalidRequest` (the request itself is
1701///   syntactically broken, not a permission issue).
1702fn mfa_error_to_s3(e: crate::mfa::MfaError) -> S3Error {
1703    match e {
1704        crate::mfa::MfaError::Missing => S3Error::with_message(
1705            S3ErrorCode::AccessDenied,
1706            "MFA token required for this operation",
1707        ),
1708        crate::mfa::MfaError::Malformed => {
1709            S3Error::with_message(S3ErrorCode::InvalidRequest, "malformed x-amz-mfa header")
1710        }
1711        crate::mfa::MfaError::SerialMismatch => S3Error::with_message(
1712            S3ErrorCode::AccessDenied,
1713            "MFA serial does not match configured device",
1714        ),
1715        crate::mfa::MfaError::InvalidCode => {
1716            S3Error::with_message(S3ErrorCode::AccessDenied, "invalid MFA code")
1717        }
1718    }
1719}
1720
1721fn is_multipart_object(metadata: &Option<Metadata>) -> bool {
1722    metadata
1723        .as_ref()
1724        .and_then(|m| m.get(META_MULTIPART))
1725        .map(|v| v == "true")
1726        .unwrap_or(false)
1727}
1728
1729const META_CODEC: &str = "s4-codec";
1730const META_ORIGINAL_SIZE: &str = "s4-original-size";
1731const META_COMPRESSED_SIZE: &str = "s4-compressed-size";
1732const META_CRC32C: &str = "s4-crc32c";
1733/// Multipart upload で per-part frame format を使ったオブジェクトであることを示す。
1734/// GET 時にこの flag を見て frame parser を起動する。
1735const META_MULTIPART: &str = "s4-multipart";
1736/// v0.2 #4: single-PUT でも S4F2 framed format で書かれていることを示す。
1737/// 旧 v0.1 single-PUT は raw 圧縮 bytes (この flag なし)。GET 時にこの flag を
1738/// 見て framed 経路 (= multipart と同じ FrameIter parse) に流す。
1739const META_FRAMED: &str = "s4-framed";
1740
1741fn is_framed_v2_object(metadata: &Option<Metadata>) -> bool {
1742    metadata
1743        .as_ref()
1744        .and_then(|m| m.get(META_FRAMED))
1745        .map(|v| v == "true")
1746        .unwrap_or(false)
1747}
1748
1749/// v0.4 #21: detect SSE-S4 by the metadata flag we set on PUT.
1750fn is_sse_encrypted(metadata: &Option<Metadata>) -> bool {
1751    metadata
1752        .as_ref()
1753        .and_then(|m| m.get("s4-encrypted"))
1754        .map(|v| v == "aes-256-gcm")
1755        .unwrap_or(false)
1756}
1757
1758/// v0.5 #27: pull the three SSE-C headers off an input struct. The S3
1759/// contract is "all three or none" — partial sets are a 400.
1760///
1761/// Returns `Ok(None)` when no SSE-C headers were sent (server-managed or
1762/// no encryption), `Ok(Some(material))` on validated client key, and
1763/// `Err` for malformed or partial inputs.
1764fn extract_sse_c_material(
1765    algorithm: &Option<String>,
1766    key: &Option<String>,
1767    md5: &Option<String>,
1768) -> S3Result<Option<crate::sse::CustomerKeyMaterial>> {
1769    match (algorithm, key, md5) {
1770        (None, None, None) => Ok(None),
1771        (Some(a), Some(k), Some(m)) => crate::sse::parse_customer_key_headers(a, k, m)
1772            .map(Some)
1773            .map_err(sse_c_error_to_s3),
1774        _ => Err(S3Error::with_message(
1775            S3ErrorCode::InvalidRequest,
1776            "SSE-C requires all three of: x-amz-server-side-encryption-customer-{algorithm,key,key-MD5}",
1777        )),
1778    }
1779}
1780
1781/// v0.5 #28: detect SSE-KMS request — `x-amz-server-side-encryption: aws:kms`.
1782/// Returns the key-id to wrap under, falling back to the gateway default.
1783fn extract_kms_key_id(
1784    sse: &Option<ServerSideEncryption>,
1785    sse_kms_key_id: &Option<String>,
1786    gateway_default: Option<&str>,
1787) -> Option<String> {
1788    let asks_for_kms = sse
1789        .as_ref()
1790        .map(|s| s.as_str() == ServerSideEncryption::AWS_KMS)
1791        .unwrap_or(false);
1792    if !asks_for_kms {
1793        return None;
1794    }
1795    sse_kms_key_id
1796        .clone()
1797        .or_else(|| gateway_default.map(str::to_owned))
1798}
1799
1800/// v0.5 #28: map kms module errors to AWS-shaped S3 error codes.
1801/// `KeyNotFound` is operator misconfig (400); `BackendUnavailable` is a
1802/// transient KMS outage (503). Other variants are 500 InternalError.
1803fn kms_error_to_s3(e: crate::kms::KmsError) -> S3Error {
1804    use crate::kms::KmsError as K;
1805    match e {
1806        K::KeyNotFound { key_id } => S3Error::with_message(
1807            S3ErrorCode::InvalidArgument,
1808            format!("KMS key not found: {key_id}"),
1809        ),
1810        K::BackendUnavailable { message } => S3Error::with_message(
1811            S3ErrorCode::ServiceUnavailable,
1812            format!("KMS backend unavailable: {message}"),
1813        ),
1814        other => S3Error::with_message(S3ErrorCode::InternalError, format!("KMS error: {other}")),
1815    }
1816}
1817
1818/// v0.5 #27: map sse module errors to AWS-shaped S3 error codes.
1819/// `WrongCustomerKey` → 403 AccessDenied (matches AWS behaviour);
1820/// `InvalidCustomerKey` / algorithm / required / unexpected → 400.
1821fn sse_c_error_to_s3(e: crate::sse::SseError) -> S3Error {
1822    use crate::sse::SseError as E;
1823    match e {
1824        E::WrongCustomerKey => S3Error::with_message(
1825            S3ErrorCode::AccessDenied,
1826            "SSE-C key does not match the key used at PUT time",
1827        ),
1828        E::InvalidCustomerKey { reason } => {
1829            S3Error::with_message(S3ErrorCode::InvalidArgument, format!("SSE-C: {reason}"))
1830        }
1831        E::CustomerKeyAlgorithmUnsupported { algo } => S3Error::with_message(
1832            S3ErrorCode::InvalidArgument,
1833            format!("SSE-C unsupported algorithm: {algo:?} (only AES256 is allowed)"),
1834        ),
1835        E::CustomerKeyRequired => S3Error::with_message(
1836            S3ErrorCode::InvalidRequest,
1837            "object is SSE-C encrypted; supply x-amz-server-side-encryption-customer-* headers",
1838        ),
1839        E::CustomerKeyUnexpected => S3Error::with_message(
1840            S3ErrorCode::InvalidRequest,
1841            "object is not SSE-C encrypted; do not send x-amz-server-side-encryption-customer-* headers",
1842        ),
1843        other => S3Error::with_message(S3ErrorCode::InternalError, format!("SSE error: {other}")),
1844    }
1845}
1846
1847fn extract_manifest(metadata: &Option<Metadata>) -> Option<ChunkManifest> {
1848    let m = metadata.as_ref()?;
1849    let codec = m
1850        .get(META_CODEC)
1851        .and_then(|s| s.parse::<CodecKind>().ok())?;
1852    let original_size = m.get(META_ORIGINAL_SIZE)?.parse().ok()?;
1853    let compressed_size = m.get(META_COMPRESSED_SIZE)?.parse().ok()?;
1854    let crc32c = m.get(META_CRC32C)?.parse().ok()?;
1855    Some(ChunkManifest {
1856        codec,
1857        original_size,
1858        compressed_size,
1859        crc32c,
1860    })
1861}
1862
1863fn write_manifest(metadata: &mut Option<Metadata>, manifest: &ChunkManifest) {
1864    let meta = metadata.get_or_insert_with(Default::default);
1865    meta.insert(META_CODEC.into(), manifest.codec.as_str().into());
1866    meta.insert(
1867        META_ORIGINAL_SIZE.into(),
1868        manifest.original_size.to_string(),
1869    );
1870    meta.insert(
1871        META_COMPRESSED_SIZE.into(),
1872        manifest.compressed_size.to_string(),
1873    );
1874    meta.insert(META_CRC32C.into(), manifest.crc32c.to_string());
1875}
1876
1877fn internal<E: std::fmt::Display>(prefix: &'static str) -> impl FnOnce(E) -> S3Error {
1878    move |e| S3Error::with_message(S3ErrorCode::InternalError, format!("{prefix}: {e}"))
1879}
1880
1881/// v0.6 #41: map a `select::SelectError` to the S3 error surface. AWS
1882/// uses a domain-specific `InvalidSqlExpression` code for parse / unsupported
1883/// errors, but s3s 0.13 doesn't expose that as a typed variant — we
1884/// fall back to the well-known `InvalidRequest` 400 with a descriptive
1885/// message that includes the original error context.
1886fn select_error_to_s3(e: crate::select::SelectError, fmt: &str) -> S3Error {
1887    use crate::select::SelectError;
1888    match e {
1889        SelectError::Parse(msg) => S3Error::with_message(
1890            S3ErrorCode::InvalidRequest,
1891            format!("SQL parse error: {msg}"),
1892        ),
1893        SelectError::UnsupportedFeature(msg) => S3Error::with_message(
1894            S3ErrorCode::InvalidRequest,
1895            format!("unsupported SQL feature: {msg}"),
1896        ),
1897        SelectError::RowEval(msg) => S3Error::with_message(
1898            S3ErrorCode::InvalidRequest,
1899            format!("SQL row evaluation error: {msg}"),
1900        ),
1901        SelectError::InputFormat(msg) => S3Error::with_message(
1902            S3ErrorCode::InvalidRequest,
1903            format!("{fmt} input format error: {msg}"),
1904        ),
1905    }
1906}
1907
1908/// v0.5 #30: parse the `x-amz-bypass-governance-retention` header into a
1909/// boolean flag. AWS S3 accepts `true` (case-insensitive); any other value
1910/// (including missing) is treated as `false`.
1911fn parse_bypass_governance_header(headers: &http::HeaderMap) -> bool {
1912    headers
1913        .get("x-amz-bypass-governance-retention")
1914        .and_then(|v| v.to_str().ok())
1915        .map(|s| s.eq_ignore_ascii_case("true"))
1916        .unwrap_or(false)
1917}
1918
1919/// Convert s3s `Timestamp` into a `chrono::DateTime<Utc>` by formatting it
1920/// as an RFC3339 string and re-parsing through `chrono`. The string format
1921/// avoids pulling the `time` crate (transitive dep of s3s, not declared by
1922/// s4-server) into our direct deps. Returns `None` if the format/parse fails
1923/// or the value is outside `chrono`'s supported range.
1924fn timestamp_to_chrono_utc(ts: &Timestamp) -> Option<chrono::DateTime<chrono::Utc>> {
1925    let mut buf = Vec::new();
1926    ts.format(s3s::dto::TimestampFormat::DateTime, &mut buf)
1927        .ok()?;
1928    let s = std::str::from_utf8(&buf).ok()?;
1929    chrono::DateTime::parse_from_rfc3339(s)
1930        .ok()
1931        .map(|dt| dt.with_timezone(&chrono::Utc))
1932}
1933
1934/// Inverse of [`timestamp_to_chrono_utc`] — emit RFC3339 (the s3s
1935/// `DateTime` wire format) and re-parse via `Timestamp::parse`.
1936fn chrono_utc_to_timestamp(dt: chrono::DateTime<chrono::Utc>) -> Timestamp {
1937    // chrono's RFC3339 output format matches s3s' parser ("...Z" with
1938    // optional sub-second precision). Fall back to UNIX_EPOCH if anything
1939    // unexpected happens — we never produce malformed strings, so this
1940    // branch is unreachable in practice.
1941    let s = dt.to_rfc3339_opts(chrono::SecondsFormat::Millis, true);
1942    Timestamp::parse(s3s::dto::TimestampFormat::DateTime, &s).unwrap_or_default()
1943}
1944
1945/// v0.6 #39: convert our internal [`crate::tagging::TagSet`] into the
1946/// s3s `Vec<Tag>` wire shape used on `GetObject/BucketTaggingOutput`.
1947/// Both halves of every pair land in the `Some(_)` slot — AWS marks
1948/// the field optional but always populates it on response.
1949fn tagset_to_aws(set: &crate::tagging::TagSet) -> Vec<Tag> {
1950    set.iter()
1951        .map(|(k, v)| Tag {
1952            key: Some(k.clone()),
1953            value: Some(v.clone()),
1954        })
1955        .collect()
1956}
1957
1958/// v0.6 #39: inverse of [`tagset_to_aws`] for input handlers. Missing
1959/// keys / values become empty strings (mirrors AWS, which rejects
1960/// `<Key/>` with InvalidTag at the parser layer; downstream
1961/// `TagSet::validate` then enforces our size limits).
1962fn aws_to_tagset(tags: &[Tag]) -> Result<crate::tagging::TagSet, crate::tagging::TagError> {
1963    let pairs = tags
1964        .iter()
1965        .map(|t| {
1966            (
1967                t.key.clone().unwrap_or_default(),
1968                t.value.clone().unwrap_or_default(),
1969            )
1970        })
1971        .collect();
1972    crate::tagging::TagSet::from_pairs(pairs)
1973}
1974
1975/// `Range` request を decompressed object サイズ `total` に適用して `(start, end_exclusive)`
1976/// を返す。`Range::Int { first, last }` は `bytes=first-last` (last は inclusive)、
1977/// `Range::Suffix { length }` は末尾 `length` byte。S3 仕様に準拠。
1978pub fn resolve_range(range: &s3s::dto::Range, total: u64) -> Result<(u64, u64), String> {
1979    if total == 0 {
1980        return Err("cannot range-get zero-length object".into());
1981    }
1982    match range {
1983        s3s::dto::Range::Int { first, last } => {
1984            let start = *first;
1985            let end_inclusive = match last {
1986                Some(l) => (*l).min(total - 1),
1987                None => total - 1,
1988            };
1989            if start > end_inclusive || start >= total {
1990                return Err(format!(
1991                    "range bytes={start}-{:?} out of object size {total}",
1992                    last
1993                ));
1994            }
1995            Ok((start, end_inclusive + 1))
1996        }
1997        s3s::dto::Range::Suffix { length } => {
1998            let len = (*length).min(total);
1999            Ok((total - len, total))
2000        }
2001    }
2002}
2003
2004#[async_trait::async_trait]
2005impl<B: S3> S3 for S4Service<B> {
2006    // === 圧縮を挟む path (PUT) ===
2007    #[tracing::instrument(
2008        name = "s4.put_object",
2009        skip(self, req),
2010        fields(bucket = %req.input.bucket, key = %req.input.key, codec, bytes_in, bytes_out, latency_ms)
2011    )]
2012    async fn put_object(
2013        &self,
2014        mut req: S3Request<PutObjectInput>,
2015    ) -> S3Result<S3Response<PutObjectOutput>> {
2016        let put_start = Instant::now();
2017        let put_bucket = req.input.bucket.clone();
2018        let put_key = req.input.key.clone();
2019        let access_preamble = self.access_log_preamble(&req);
2020        self.enforce_rate_limit(&req, &put_bucket)?;
2021        // v0.6 #39: parse `x-amz-tagging` (URL-encoded query string) so
2022        // the IAM policy gate sees the request's tags via
2023        // `s3:RequestObjectTag/<key>`. `existing_object_tags` is also
2024        // resolved from the Tagging manager (when wired) so
2025        // `s3:ExistingObjectTag/<key>` works on overwrite.
2026        let request_tags: Option<crate::tagging::TagSet> = req
2027            .input
2028            .tagging
2029            .as_deref()
2030            .map(crate::tagging::parse_tagging_header)
2031            .transpose()
2032            .map_err(|e| S3Error::with_message(S3ErrorCode::InvalidArgument, e.to_string()))?;
2033        let existing_tags: Option<crate::tagging::TagSet> = self
2034            .tagging
2035            .as_ref()
2036            .and_then(|m| m.get_object_tags(&put_bucket, &put_key));
2037        self.enforce_policy_with_extra(
2038            &req,
2039            "s3:PutObject",
2040            &put_bucket,
2041            Some(&put_key),
2042            request_tags.as_ref(),
2043            existing_tags.as_ref(),
2044        )?;
2045        // v0.5 #30: an Object Lock-protected key cannot be overwritten by
2046        // a non-versioned PUT (Suspended / Unversioned bucket). Enabled
2047        // bucket PUTs are exempt because they materialise a fresh
2048        // version under a shadow key (`<key>.__s4ver__/<vid>`) — the
2049        // locked version's bytes are untouched. The check mirrors the
2050        // delete path (Compliance never bypassable, Governance via the
2051        // bypass header, legal hold never).
2052        if let Some(mgr) = self.object_lock.as_ref()
2053            && let Some(state) = mgr.get(&put_bucket, &put_key)
2054        {
2055            let bucket_versioned_enabled = self
2056                .versioning
2057                .as_ref()
2058                .map(|v| v.state(&put_bucket) == crate::versioning::VersioningState::Enabled)
2059                .unwrap_or(false);
2060            if !bucket_versioned_enabled {
2061                let bypass = parse_bypass_governance_header(&req.headers);
2062                let now = chrono::Utc::now();
2063                if !state.can_delete(now, bypass) {
2064                    crate::metrics::record_policy_denial("s3:PutObject", &put_bucket);
2065                    return Err(S3Error::with_message(
2066                        S3ErrorCode::AccessDenied,
2067                        "Access Denied because object protected by object lock",
2068                    ));
2069                }
2070            }
2071        }
2072        // v0.5 #30: per-PUT explicit retention / legal hold (S3
2073        // `x-amz-object-lock-mode`, `x-amz-object-lock-retain-until-date`,
2074        // `x-amz-object-lock-legal-hold`). Captured before the body
2075        // moves into the backend; persisted into the manager only on
2076        // backend success below.
2077        let explicit_lock_mode: Option<crate::object_lock::LockMode> = req
2078            .input
2079            .object_lock_mode
2080            .as_ref()
2081            .and_then(|m| crate::object_lock::LockMode::from_aws_str(m.as_str()));
2082        let explicit_retain_until: Option<chrono::DateTime<chrono::Utc>> = req
2083            .input
2084            .object_lock_retain_until_date
2085            .as_ref()
2086            .and_then(timestamp_to_chrono_utc);
2087        let explicit_legal_hold_on: Option<bool> = req
2088            .input
2089            .object_lock_legal_hold_status
2090            .as_ref()
2091            .map(|s| s.as_str().eq_ignore_ascii_case("ON"));
2092        if let Some(blob) = req.input.body.take() {
2093            // Sample 4 KiB から codec を決定。streaming-aware codec なら streaming
2094            // compress fast path、そうでなければ従来の collect-then-compress。
2095            let (sample, rest_stream) = peek_sample(blob, SAMPLE_BYTES)
2096                .await
2097                .map_err(internal("peek put sample"))?;
2098            let sample_len = sample.len().min(SAMPLE_BYTES);
2099            // v0.8 #56: pass the request's Content-Length (when present) so
2100            // the sampling dispatcher can promote large objects to a GPU
2101            // codec. Chunked transfers (no Content-Length) keep CPU.
2102            let total_size_hint = req.input.content_length.and_then(|n| u64::try_from(n).ok());
2103            let kind = self
2104                .dispatcher
2105                .pick_with_size_hint(&sample[..sample_len], total_size_hint)
2106                .await;
2107
2108            // Passthrough buys nothing from S4F2 wrapping (no compression =
2109            // no per-chunk frame to skip past) and the +28-byte header
2110            // overhead breaks size-sensitive callers that expect a true
2111            // pass-through. So passthrough always uses the legacy raw-blob
2112            // path; only compressing codecs go through the framed path.
2113            let use_framed = supports_streaming_compress(kind) && kind != CodecKind::Passthrough;
2114            let (compressed, manifest, is_framed) = if use_framed {
2115                // streaming fast path: input は memory に collect しない
2116                let chained = chain_sample_with_rest(sample, rest_stream);
2117                debug!(
2118                    bucket = ?req.input.bucket,
2119                    key = ?req.input.key,
2120                    codec = kind.as_str(),
2121                    path = "streaming-framed",
2122                    "S4 put_object: compressing (streaming, S4F2 multi-frame)"
2123                );
2124                // v0.4 #16: pick the chunk size based on the request's
2125                // Content-Length when known, falling back to the 4 MiB
2126                // default for chunked transfers.
2127                let chunk_size = pick_chunk_size(req.input.content_length.map(|n| n as u64));
2128                // v0.8.4 #73 M2: pass the request's Content-Length so
2129                // streaming_compress_to_frames can fail-fast on a mid-PUT
2130                // truncation (client disconnect after sending half the
2131                // body). `None` is the chunked-Transfer-Encoding case
2132                // where the upstream genuinely doesn't know the size and
2133                // the backend's framing layer is the only truncation
2134                // signal we have.
2135                let expected_input_size =
2136                    req.input.content_length.and_then(|n| u64::try_from(n).ok());
2137                let (body, manifest) = streaming_compress_to_frames(
2138                    chained,
2139                    Arc::clone(&self.registry),
2140                    kind,
2141                    chunk_size,
2142                    expected_input_size,
2143                )
2144                .await
2145                .map_err(|e| match e {
2146                    s4_codec::CodecError::TruncatedStream { expected, got } => {
2147                        // 400 IncompleteBody: client advertised N bytes
2148                        // but disconnected after `got`. Mirrors AWS S3's
2149                        // canonical error code for the same shape so SDK
2150                        // retries kick in instead of treating the PUT as
2151                        // a successful upload of a half-body.
2152                        S3Error::with_message(
2153                            S3ErrorCode::IncompleteBody,
2154                            format!("PUT body truncated: expected {expected} bytes, got {got}"),
2155                        )
2156                    }
2157                    other => internal("streaming framed compress")(other),
2158                })?;
2159                (body, manifest, true)
2160            } else {
2161                // GPU codec 等で streaming-aware でないものは bytes-buffered path
2162                // (raw 圧縮 bytes、framed なし — back-compat 互換 path)
2163                let bytes = collect_with_sample(sample, rest_stream, self.max_body_bytes)
2164                    .await
2165                    .map_err(internal("collect put body (buffered path)"))?;
2166                debug!(
2167                    bucket = ?req.input.bucket,
2168                    key = ?req.input.key,
2169                    bytes = bytes.len(),
2170                    codec = kind.as_str(),
2171                    path = "buffered",
2172                    "S4 put_object: compressing (buffered, raw blob)"
2173                );
2174                // v0.8 #55: telemetry-returning compress so we can stamp
2175                // GPU-pipeline Prometheus metrics (`s4_gpu_compress_seconds`,
2176                // throughput gauge, OOM counter) for nvcomp / dietgpu codecs.
2177                // CPU codecs come back with `gpu_seconds = None` and the
2178                // stamp helper short-circuits — no extra cost on CPU path.
2179                let (compress_res, tel) = self.registry.compress_with_telemetry(bytes, kind).await;
2180                stamp_gpu_compress_telemetry(&tel);
2181                let (body, m) = compress_res.map_err(internal("registry compress"))?;
2182                (body, m, false)
2183            };
2184
2185            write_manifest(&mut req.input.metadata, &manifest);
2186            if is_framed {
2187                // v0.2 #4: framed body であることを GET 側に伝える meta flag。
2188                req.input
2189                    .metadata
2190                    .get_or_insert_with(Default::default)
2191                    .insert(META_FRAMED.into(), "true".into());
2192            }
2193            // 重要: content_length を圧縮後サイズで更新する。
2194            // これを忘れると下流 (aws-sdk-s3 → S3) が宣言サイズ分の bytes を
2195            // 待ち続けて RequestTimeout で失敗する (S3 仕様)。
2196            req.input.content_length = Some(compressed.len() as i64);
2197            // body を書き換えたので、客側が送ってきた original body 用の
2198            // checksum / MD5 ヘッダは無効化する (そのまま転送すると下流 S3 が
2199            // XAmzContentChecksumMismatch を返す)。S4 自身の整合性は
2200            // ChunkManifest.crc32c で担保している。
2201            req.input.checksum_algorithm = None;
2202            req.input.checksum_crc32 = None;
2203            req.input.checksum_crc32c = None;
2204            req.input.checksum_crc64nvme = None;
2205            req.input.checksum_sha1 = None;
2206            req.input.checksum_sha256 = None;
2207            req.input.content_md5 = None;
2208            let original_size = manifest.original_size;
2209            let compressed_size = manifest.compressed_size;
2210            let codec_label = manifest.codec.as_str();
2211            // framed body は GET 側で sidecar partial-fetch を効かせるため
2212            // build_index_from_body で sidecar を組み立てて backend に PUT する。
2213            let sidecar_index = if is_framed {
2214                s4_codec::index::build_index_from_body(&compressed).ok()
2215            } else {
2216                None
2217            };
2218            // v0.4 #21 / v0.5 #29 / v0.5 #27: encrypt-after-compress.
2219            // Precedence:
2220            //   - SSE-C headers present → per-request customer key (S4E3)
2221            //   - server-managed keyring configured → active key (S4E2)
2222            //   - neither → no encryption (raw compressed body)
2223            // The `s4-encrypted: aes-256-gcm` metadata flag is set in
2224            // both encrypted modes; the on-disk frame magic distinguishes
2225            // S4E1 / S4E2 / S4E3 so GET picks the right decrypt path.
2226            // v0.7 #48 BUG-2/3 fix: take() the SSE fields off req.input
2227            // so the encryption headers are NOT forwarded to the
2228            // backend. S4 owns the encrypt-then-store contract; if we
2229            // leave the headers in place, real S3-compat backends
2230            // (MinIO / AWS) try to apply their own SSE on top and
2231            // either reject (MinIO requires HTTPS for SSE-C) or fail
2232            // (MinIO has no KMS configured). MemoryBackend ignored
2233            // these so mock tests passed.
2234            let sse_c_alg = req.input.sse_customer_algorithm.take();
2235            let sse_c_key = req.input.sse_customer_key.take();
2236            let sse_c_md5 = req.input.sse_customer_key_md5.take();
2237            let sse_header = req.input.server_side_encryption.take();
2238            let sse_kms_key = req.input.ssekms_key_id.take();
2239            let sse_c_material = extract_sse_c_material(&sse_c_alg, &sse_c_key, &sse_c_md5)?;
2240            // v0.5 #28: SSE-KMS request? Resolves to None unless the
2241            // request asks for `aws:kms` AND a key id is available
2242            // (explicit header or gateway default). When set, we'll
2243            // generate a per-object DEK below.
2244            let kms_key_id = extract_kms_key_id(
2245                &sse_header,
2246                &sse_kms_key,
2247                self.kms_default_key_id.as_deref(),
2248            );
2249            // v0.5 #32: in compliance-strict mode, every PUT must
2250            // declare SSE — either client-supplied (SSE-C), KMS, or by
2251            // virtue of a server-side keyring being configured (which
2252            // applies SSE-S4 to every PUT automatically). Requests that
2253            // would otherwise land as plain compressed bytes are
2254            // rejected with 400 InvalidRequest.
2255            if self.compliance_strict
2256                && sse_c_material.is_none()
2257                && kms_key_id.is_none()
2258                && self.sse_keyring.is_none()
2259                && sse_header.as_ref().map(|s| s.as_str()) != Some(ServerSideEncryption::AES256)
2260            {
2261                return Err(S3Error::with_message(
2262                    S3ErrorCode::InvalidRequest,
2263                    "compliance-mode strict: PUT must include x-amz-server-side-encryption \
2264                     (AES256 or aws:kms) or x-amz-server-side-encryption-customer-* headers",
2265                ));
2266            }
2267            // SSE-C and SSE-KMS are mutually exclusive on a single PUT
2268            // (AWS S3 returns 400 InvalidArgument). SSE-C wins by spec.
2269            if sse_c_material.is_some() && kms_key_id.is_some() {
2270                return Err(S3Error::with_message(
2271                    S3ErrorCode::InvalidArgument,
2272                    "SSE-C and SSE-KMS cannot be used together on the same PUT",
2273                ));
2274            }
2275            // KMS path needs to call generate_dek().await before the
2276            // body_to_send branch; capture the result here.
2277            //
2278            // v0.8.1 #58: the plaintext DEK lives in three places
2279            // during one PUT:
2280            //
2281            //   1. The `Zeroizing<Vec<u8>>` returned by `generate_dek`
2282            //      — wiped when the binding `dek` falls out of scope at
2283            //      the end of this `if`-arm.
2284            //   2. The stack `[u8; 32]` we copy into for `SseSource::Kms`
2285            //      — wrapped in `Zeroizing<[u8; 32]>` so it's wiped when
2286            //      the outer `kms_wrap` `Option` is dropped at the end
2287            //      of `put_object`.
2288            //   3. AES-GCM internal key state inside the `aes-gcm`
2289            //      crate during `encrypt_with_source` — out of scope
2290            //      for this fix; tracked separately in v0.8.2.
2291            let kms_wrap: Option<(zeroize::Zeroizing<[u8; 32]>, crate::kms::WrappedDek)> =
2292                if let Some(ref key_id) = kms_key_id {
2293                    let kms = self.kms.as_ref().ok_or_else(|| {
2294                    S3Error::with_message(
2295                        S3ErrorCode::InvalidRequest,
2296                        "SSE-KMS requested but no --kms-local-dir / --kms-aws-region is configured on this gateway",
2297                    )
2298                })?;
2299                    // `dek` is `Zeroizing<Vec<u8>>`; deref + slice access
2300                    // works unchanged via `Deref<Target=Vec<u8>>`.
2301                    let (dek, wrapped) = kms.generate_dek(key_id).await.map_err(kms_error_to_s3)?;
2302                    if dek.len() != 32 {
2303                        return Err(S3Error::with_message(
2304                            S3ErrorCode::InternalError,
2305                            format!(
2306                                "KMS backend returned a DEK of {} bytes (expected 32)",
2307                                dek.len()
2308                            ),
2309                        ));
2310                    }
2311                    let mut dek_arr: zeroize::Zeroizing<[u8; 32]> =
2312                        zeroize::Zeroizing::new([0u8; 32]);
2313                    dek_arr.copy_from_slice(&dek);
2314                    // `dek` (the `Zeroizing<Vec<u8>>`) is dropped at the
2315                    // end of this scope, wiping the heap allocation.
2316                    Some((dek_arr, wrapped))
2317                } else {
2318                    None
2319                };
2320            // v0.7 #48 BUG-4 fix: stamp the SSE *type* into metadata
2321            // alongside `s4-encrypted` so HEAD (which doesn't fetch the
2322            // body) can echo the correct `x-amz-server-side-encryption`
2323            // value. Without this, HEAD on an SSE-KMS object would not
2324            // echo `aws:kms` because the frame magic is only available
2325            // on the body (which HEAD doesn't read).
2326            let body_to_send = if let Some(ref m) = sse_c_material {
2327                let meta = req.input.metadata.get_or_insert_with(Default::default);
2328                meta.insert("s4-encrypted".into(), "aes-256-gcm".into());
2329                meta.insert("s4-sse-type".into(), "AES256".into());
2330                meta.insert(
2331                    "s4-sse-c-key-md5".into(),
2332                    base64::engine::general_purpose::STANDARD.encode(m.key_md5),
2333                );
2334                crate::sse::encrypt_with_source(
2335                    &compressed,
2336                    crate::sse::SseSource::CustomerKey {
2337                        key: &m.key,
2338                        key_md5: &m.key_md5,
2339                    },
2340                )
2341            } else if let Some((ref dek, ref wrapped)) = kms_wrap {
2342                let meta = req.input.metadata.get_or_insert_with(Default::default);
2343                meta.insert("s4-encrypted".into(), "aes-256-gcm".into());
2344                meta.insert("s4-sse-type".into(), "aws:kms".into());
2345                meta.insert("s4-sse-kms-key-id".into(), wrapped.key_id.clone());
2346                // v0.8.1 #58: `dek` is `&Zeroizing<[u8; 32]>`; `SseSource::Kms`
2347                // wants `&[u8; 32]`. Rust auto-derefs `&Zeroizing<T>` to
2348                // `&T` here via `Deref<Target=T>`, so the binding picks
2349                // up the inner array reference without copying. The array
2350                // stays in the `Zeroizing` wrapper that owns it and gets
2351                // wiped when `kms_wrap` drops at the end of `put_object`.
2352                let dek_ref: &[u8; 32] = dek;
2353                crate::sse::encrypt_with_source(
2354                    &compressed,
2355                    crate::sse::SseSource::Kms {
2356                        dek: dek_ref,
2357                        wrapped,
2358                    },
2359                )
2360            } else if let Some(keyring) = self.sse_keyring.as_ref() {
2361                // SSE-S4 is server-driven transparent encryption; the
2362                // client didn't ask for SSE. We stamp `s4-encrypted`
2363                // (internal flag the GET path needs) but deliberately
2364                // do NOT stamp `s4-sse-type` — that lights up the HEAD
2365                // echo of `x-amz-server-side-encryption: AES256`,
2366                // which would falsely advertise AWS-style SSE-S3
2367                // semantics the operator didn't request.
2368                let meta = req.input.metadata.get_or_insert_with(Default::default);
2369                meta.insert("s4-encrypted".into(), "aes-256-gcm".into());
2370                // v0.8 #52: when `--sse-chunk-size > 0` is configured,
2371                // emit the chunked S4E5 frame so the matching GET can
2372                // stream-decrypt instead of buffering 5 GiB before
2373                // emitting a byte. Falls back to the buffered S4E2
2374                // frame at chunk_size=0 (default) so existing
2375                // deployments are bit-for-bit unchanged.
2376                if self.sse_chunk_size > 0 {
2377                    crate::sse::encrypt_v2_chunked(&compressed, keyring, self.sse_chunk_size)
2378                        .map_err(|e| {
2379                            S3Error::with_message(
2380                                S3ErrorCode::InternalError,
2381                                format!("SSE-S4 chunked encrypt failed: {e}"),
2382                            )
2383                        })?
2384                } else {
2385                    crate::sse::encrypt_v2(&compressed, keyring)
2386                }
2387            } else {
2388                compressed.clone()
2389            };
2390            // v0.6 #40: capture the about-to-be-sent body + metadata so
2391            // the replication dispatcher (run after the source PUT
2392            // succeeds) can hand the same backend bytes to the
2393            // destination bucket. `Bytes` clone is cheap (refcounted).
2394            let replication_body = body_to_send.clone();
2395            let replication_metadata = req.input.metadata.clone();
2396            // v0.7 #48 BUG-1 fix: SSE encryption (S4E1/E2/E3/E4 frames)
2397            // makes the body longer than the post-compression bytes
2398            // (header + nonce + tag overhead). The earlier
2399            // content_length stamp at compressed.len() is now stale, so
2400            // re-stamp from the actual bytes about to be sent or the
2401            // backend (real S3 / MinIO) rejects with
2402            // `StreamLengthMismatch`. MemoryBackend never validated
2403            // this, which is why mock-only tests passed.
2404            req.input.content_length = Some(body_to_send.len() as i64);
2405            req.input.body = Some(bytes_to_blob(body_to_send));
2406            // v0.5 #34: pre-allocate a version-id when the bucket is
2407            // Enabled, then redirect the backend storage key to the
2408            // shadow path so older versions survive newer PUTs.
2409            // Suspended / Unversioned buckets keep using the plain
2410            // `<key>` (S3 spec: Suspended overwrites the same backend
2411            // object). Pre-allocation (instead of recording after PUT)
2412            // ensures the shadow key + the response's
2413            // `x-amz-version-id` use the same vid.
2414            let pending_version: Option<crate::versioning::PutOutcome> = self
2415                .versioning
2416                .as_ref()
2417                .map(|mgr| mgr.state(&put_bucket))
2418                .map(|state| match state {
2419                    crate::versioning::VersioningState::Enabled => crate::versioning::PutOutcome {
2420                        version_id: crate::versioning::VersioningManager::new_version_id(),
2421                        versioned_response: true,
2422                    },
2423                    crate::versioning::VersioningState::Suspended
2424                    | crate::versioning::VersioningState::Unversioned => {
2425                        crate::versioning::PutOutcome {
2426                            version_id: crate::versioning::NULL_VERSION_ID.to_owned(),
2427                            versioned_response: false,
2428                        }
2429                    }
2430                });
2431            if let Some(ref pv) = pending_version
2432                && pv.versioned_response
2433            {
2434                req.input.key = versioned_shadow_key(&put_key, &pv.version_id);
2435            }
2436            // v0.8.4 #73 H-2: capture the to-be-stored body length BEFORE
2437            // the move into `req.input` is consumed by the backend call.
2438            // The sidecar's `source_compressed_size` is checked against
2439            // the live HEAD `Content-Length` on Range GET to detect a
2440            // backend-side mutation.
2441            let backend_object_size = req.input.content_length.and_then(|n| u64::try_from(n).ok());
2442            let mut backend_resp = self.backend.put_object(req).await;
2443            if let Some(mut idx) = sidecar_index
2444                && let Ok(ref resp) = backend_resp
2445                && idx.entries.len() > 1
2446            {
2447                // 1 chunk しかない (small object) なら sidecar は意味がない (=
2448                // partial fetch しても full body と同じ範囲) ので省略。
2449                // Sidecar は user-visible key で書く (latest version の
2450                // partial fetch path 用)。Old versions の Range GET は今 task
2451                // の scope 外 (full read fallback でも意味的には正しい)。
2452                //
2453                // v0.8.4 #73 H-2: stamp the version-binding fields the
2454                // GET path needs to detect a stale / attacker-written
2455                // sidecar. ETag comes from the backend's PUT response —
2456                // when missing (some backends don't return an ETag) we
2457                // synthesize a CRC-derived stable identifier so the
2458                // sidecar still binds to *something*; the GET HEAD will
2459                // see the same backend ETag (None vs None) and treat the
2460                // pair as consistent.
2461                let source_etag = resp.output.e_tag.as_ref().map(|t| t.value().to_string());
2462                idx.source_etag = source_etag;
2463                idx.source_compressed_size = backend_object_size;
2464                self.write_sidecar(&put_bucket, &put_key, &idx).await;
2465            }
2466            // v0.5 #34: commit the new version into the manager only on
2467            // backend success. Use the pre-allocated vid so the response
2468            // header and the chain entry agree.
2469            if let (Some(mgr), Some(pv), Ok(resp)) = (
2470                self.versioning.as_ref(),
2471                pending_version.as_ref(),
2472                backend_resp.as_mut(),
2473            ) {
2474                let etag = resp
2475                    .output
2476                    .e_tag
2477                    .clone()
2478                    .map(ETag::into_value)
2479                    .unwrap_or_else(|| format!("\"crc32c-{}\"", manifest.crc32c));
2480                let now = chrono::Utc::now();
2481                mgr.commit_put_with_version(
2482                    &put_bucket,
2483                    &put_key,
2484                    crate::versioning::VersionEntry {
2485                        version_id: pv.version_id.clone(),
2486                        etag,
2487                        size: original_size,
2488                        is_delete_marker: false,
2489                        created_at: now,
2490                    },
2491                );
2492                if pv.versioned_response {
2493                    resp.output.version_id = Some(pv.version_id.clone());
2494                }
2495            }
2496            // v0.5 #27: AWS S3 echoes the SSE-C headers back on success
2497            // so the client knows the server actually applied the
2498            // requested algorithm and which key fingerprint matched.
2499            if let (Some(m), Ok(resp)) = (sse_c_material.as_ref(), backend_resp.as_mut()) {
2500                resp.output.sse_customer_algorithm = Some(crate::sse::SSE_C_ALGORITHM.into());
2501                resp.output.sse_customer_key_md5 =
2502                    Some(base64::engine::general_purpose::STANDARD.encode(m.key_md5));
2503            }
2504            // v0.5 #28: SSE-KMS echo — `aws:kms` + the canonical key id
2505            // the backend returned (AWS KMS returns the ARN even when
2506            // the request used an alias).
2507            if let (Some((_, wrapped)), Ok(resp)) = (kms_wrap.as_ref(), backend_resp.as_mut()) {
2508                resp.output.server_side_encryption = Some(ServerSideEncryption::from_static(
2509                    ServerSideEncryption::AWS_KMS,
2510                ));
2511                resp.output.ssekms_key_id = Some(wrapped.key_id.clone());
2512            }
2513            // v0.5 #30: persist any per-PUT explicit retention / legal
2514            // hold the client supplied, then auto-apply the bucket
2515            // default (no-op when state is already populated). The
2516            // explicit fields take precedence — the bucket-default
2517            // helper bails out as soon as it sees any retention.
2518            if let (Some(mgr), Ok(_)) = (self.object_lock.as_ref(), backend_resp.as_ref()) {
2519                if explicit_lock_mode.is_some()
2520                    || explicit_retain_until.is_some()
2521                    || explicit_legal_hold_on.is_some()
2522                {
2523                    let mut state = mgr.get(&put_bucket, &put_key).unwrap_or_default();
2524                    if let Some(m) = explicit_lock_mode {
2525                        state.mode = Some(m);
2526                    }
2527                    if let Some(u) = explicit_retain_until {
2528                        state.retain_until = Some(u);
2529                    }
2530                    if let Some(lh) = explicit_legal_hold_on {
2531                        state.legal_hold_on = lh;
2532                    }
2533                    mgr.set(&put_bucket, &put_key, state);
2534                }
2535                mgr.apply_default_on_put(&put_bucket, &put_key, chrono::Utc::now());
2536            }
2537            let _ = (original_size, compressed_size); // mute unused warnings
2538            let elapsed = put_start.elapsed();
2539            crate::metrics::record_put(
2540                codec_label,
2541                original_size,
2542                compressed_size,
2543                elapsed.as_secs_f64(),
2544                backend_resp.is_ok(),
2545            );
2546            // v0.4 #20: structured access-log entry (best-effort).
2547            self.record_access(
2548                access_preamble,
2549                "REST.PUT.OBJECT",
2550                &put_bucket,
2551                Some(&put_key),
2552                if backend_resp.is_ok() { 200 } else { 500 },
2553                compressed_size,
2554                original_size,
2555                elapsed.as_millis() as u64,
2556                backend_resp.as_ref().err().map(|e| e.code().as_str()),
2557            )
2558            .await;
2559            info!(
2560                op = "put_object",
2561                bucket = %put_bucket,
2562                key = %put_key,
2563                codec = codec_label,
2564                bytes_in = original_size,
2565                bytes_out = compressed_size,
2566                ratio = format!(
2567                    "{:.3}",
2568                    if original_size == 0 { 1.0 } else { compressed_size as f64 / original_size as f64 }
2569                ),
2570                latency_ms = elapsed.as_millis() as u64,
2571                ok = backend_resp.is_ok(),
2572                "S4 put completed"
2573            );
2574            // v0.6 #35: fire bucket-notification destinations (best-effort,
2575            // detached). Skipped when no manager is attached or when the
2576            // bucket has no rule matching `s3:ObjectCreated:Put` for this
2577            // key.
2578            if backend_resp.is_ok()
2579                && let Some(mgr) = self.notifications.as_ref()
2580            {
2581                let dests = mgr.match_destinations(
2582                    &put_bucket,
2583                    &crate::notifications::EventType::ObjectCreatedPut,
2584                    &put_key,
2585                );
2586                if !dests.is_empty() {
2587                    let etag = backend_resp
2588                        .as_ref()
2589                        .ok()
2590                        .and_then(|r| r.output.e_tag.clone())
2591                        .map(ETag::into_value);
2592                    let version_id = pending_version
2593                        .as_ref()
2594                        .filter(|pv| pv.versioned_response)
2595                        .map(|pv| pv.version_id.clone());
2596                    tokio::spawn(crate::notifications::dispatch_event(
2597                        Arc::clone(mgr),
2598                        put_bucket.clone(),
2599                        put_key.clone(),
2600                        crate::notifications::EventType::ObjectCreatedPut,
2601                        Some(original_size),
2602                        etag,
2603                        version_id,
2604                        format!("S4-{}", uuid::Uuid::new_v4()),
2605                    ));
2606                }
2607            }
2608            // v0.6 #39: persist parsed `x-amz-tagging` tags into the
2609            // tagging manager on a successful PUT. AWS PutObject's
2610            // tagging is a full-replace operation (not a merge), so
2611            // any pre-existing entry for `(bucket, key)` is overwritten.
2612            if backend_resp.is_ok()
2613                && let (Some(mgr), Some(tags)) = (self.tagging.as_ref(), request_tags.clone())
2614            {
2615                mgr.put_object_tags(&put_bucket, &put_key, tags);
2616            }
2617            // v0.6 #40: cross-bucket replication fire-point. On
2618            // successful source PUT, consult the replication manager;
2619            // when an enabled rule matches, mark the source key
2620            // `Pending` and spawn a detached task that PUTs the same
2621            // backend bytes + metadata to the rule's destination
2622            // bucket. The dispatcher itself records `Completed` /
2623            // `Failed` and bumps the drop counter on retry-budget
2624            // exhaustion.
2625            self.spawn_replication_if_matched(
2626                &put_bucket,
2627                &put_key,
2628                &request_tags,
2629                &replication_body,
2630                &replication_metadata,
2631                backend_resp.is_ok(),
2632                pending_version.as_ref(),
2633            );
2634            return backend_resp;
2635        }
2636        // Body-less PUT (rare: zero-length object). Mirror the body-full
2637        // versioning hooks so list_object_versions / GET-by-version still see
2638        // empty-body objects in the chain.
2639        let pending_version: Option<crate::versioning::PutOutcome> = self
2640            .versioning
2641            .as_ref()
2642            .map(|mgr| mgr.state(&put_bucket))
2643            .map(|state| match state {
2644                crate::versioning::VersioningState::Enabled => crate::versioning::PutOutcome {
2645                    version_id: crate::versioning::VersioningManager::new_version_id(),
2646                    versioned_response: true,
2647                },
2648                _ => crate::versioning::PutOutcome {
2649                    version_id: crate::versioning::NULL_VERSION_ID.to_owned(),
2650                    versioned_response: false,
2651                },
2652            });
2653        if let Some(ref pv) = pending_version
2654            && pv.versioned_response
2655        {
2656            req.input.key = versioned_shadow_key(&put_key, &pv.version_id);
2657        }
2658        let mut backend_resp = self.backend.put_object(req).await;
2659        if let (Some(mgr), Some(pv), Ok(resp)) = (
2660            self.versioning.as_ref(),
2661            pending_version.as_ref(),
2662            backend_resp.as_mut(),
2663        ) {
2664            let etag = resp
2665                .output
2666                .e_tag
2667                .clone()
2668                .map(ETag::into_value)
2669                .unwrap_or_default();
2670            let now = chrono::Utc::now();
2671            mgr.commit_put_with_version(
2672                &put_bucket,
2673                &put_key,
2674                crate::versioning::VersionEntry {
2675                    version_id: pv.version_id.clone(),
2676                    etag,
2677                    size: 0,
2678                    is_delete_marker: false,
2679                    created_at: now,
2680                },
2681            );
2682            if pv.versioned_response {
2683                resp.output.version_id = Some(pv.version_id.clone());
2684            }
2685        }
2686        // v0.5 #30: same explicit-then-default lock-state commit as the
2687        // body-bearing branch above, so a zero-length PUT also picks up
2688        // bucket-default retention.
2689        if let (Some(mgr), Ok(_)) = (self.object_lock.as_ref(), backend_resp.as_ref()) {
2690            if explicit_lock_mode.is_some()
2691                || explicit_retain_until.is_some()
2692                || explicit_legal_hold_on.is_some()
2693            {
2694                let mut state = mgr.get(&put_bucket, &put_key).unwrap_or_default();
2695                if let Some(m) = explicit_lock_mode {
2696                    state.mode = Some(m);
2697                }
2698                if let Some(u) = explicit_retain_until {
2699                    state.retain_until = Some(u);
2700                }
2701                if let Some(lh) = explicit_legal_hold_on {
2702                    state.legal_hold_on = lh;
2703                }
2704                mgr.set(&put_bucket, &put_key, state);
2705            }
2706            mgr.apply_default_on_put(&put_bucket, &put_key, chrono::Utc::now());
2707        }
2708        // v0.6 #35: same notification fire-point as the body-bearing PUT
2709        // branch above (zero-length objects still match `ObjectCreated:Put`
2710        // rules per the AWS event taxonomy).
2711        if backend_resp.is_ok()
2712            && let Some(mgr) = self.notifications.as_ref()
2713        {
2714            let dests = mgr.match_destinations(
2715                &put_bucket,
2716                &crate::notifications::EventType::ObjectCreatedPut,
2717                &put_key,
2718            );
2719            if !dests.is_empty() {
2720                let etag = backend_resp
2721                    .as_ref()
2722                    .ok()
2723                    .and_then(|r| r.output.e_tag.clone())
2724                    .map(ETag::into_value);
2725                let version_id = pending_version
2726                    .as_ref()
2727                    .filter(|pv| pv.versioned_response)
2728                    .map(|pv| pv.version_id.clone());
2729                tokio::spawn(crate::notifications::dispatch_event(
2730                    Arc::clone(mgr),
2731                    put_bucket.clone(),
2732                    put_key.clone(),
2733                    crate::notifications::EventType::ObjectCreatedPut,
2734                    Some(0),
2735                    etag,
2736                    version_id,
2737                    format!("S4-{}", uuid::Uuid::new_v4()),
2738                ));
2739            }
2740        }
2741        // v0.6 #39: persist parsed `x-amz-tagging` for the body-less
2742        // (zero-length) PUT branch too — same shape as the body-bearing
2743        // branch above.
2744        if backend_resp.is_ok()
2745            && let (Some(mgr), Some(tags)) = (self.tagging.as_ref(), request_tags.clone())
2746        {
2747            mgr.put_object_tags(&put_bucket, &put_key, tags);
2748        }
2749        // v0.6 #40: cross-bucket replication for the zero-length PUT
2750        // branch — same shape as the body-bearing branch above.
2751        // v0.8.2 #61: pass `pending_version` so a versioned source's
2752        // destination receives the same shadow-key path.
2753        self.spawn_replication_if_matched(
2754            &put_bucket,
2755            &put_key,
2756            &request_tags,
2757            &bytes::Bytes::new(),
2758            &None,
2759            backend_resp.is_ok(),
2760            pending_version.as_ref(),
2761        );
2762        backend_resp
2763    }
2764
2765    // === 圧縮を解く path (GET) ===
2766    #[tracing::instrument(
2767        name = "s4.get_object",
2768        skip(self, req),
2769        fields(bucket = %req.input.bucket, key = %req.input.key, codec, bytes_out, range, path)
2770    )]
2771    async fn get_object(
2772        &self,
2773        mut req: S3Request<GetObjectInput>,
2774    ) -> S3Result<S3Response<GetObjectOutput>> {
2775        let get_start = Instant::now();
2776        let get_bucket = req.input.bucket.clone();
2777        let get_key = req.input.key.clone();
2778        self.enforce_rate_limit(&req, &get_bucket)?;
2779        self.enforce_policy(&req, "s3:GetObject", &get_bucket, Some(&get_key))?;
2780        // Range request の事前検出 (decompress 後 slice する path に使う)。
2781        let range_request = req.input.range.take();
2782        // v0.5 #27: pull SSE-C material from the input headers before
2783        // the request is moved into the backend. A header parse error
2784        // fails fast (no body fetch). The material is consumed below
2785        // when decrypting an S4E3-framed body; the SSE-C headers on
2786        // `req.input` are cleared so the backend doesn't see them.
2787        let sse_c_alg = req.input.sse_customer_algorithm.take();
2788        let sse_c_key = req.input.sse_customer_key.take();
2789        let sse_c_md5 = req.input.sse_customer_key_md5.take();
2790        let get_sse_c_material = extract_sse_c_material(&sse_c_alg, &sse_c_key, &sse_c_md5)?;
2791
2792        // v0.5 #34: route the GET through the VersioningManager when
2793        // attached AND the bucket is in a versioning-aware state.
2794        // Resolves which version to fetch (explicit `?versionId=` query
2795        // param vs. chain latest), translates a delete-marker into 404
2796        // NoSuchKey, and rewrites the backend storage key to the shadow
2797        // path (`<key>.__s4ver__/<vid>`) for non-null Enabled-bucket
2798        // versions. `resolved_version_id` is stamped onto the response
2799        // so clients see a coherent `x-amz-version-id` header.
2800        //
2801        // When the bucket is Unversioned (or no manager attached), the
2802        // chain-resolution step is skipped and the request flows
2803        // through the existing single-key path unchanged.
2804        let resolved_version_id: Option<String> = match self.versioning.as_ref() {
2805            Some(mgr)
2806                if mgr.state(&get_bucket) != crate::versioning::VersioningState::Unversioned =>
2807            {
2808                let req_vid = req.input.version_id.take();
2809                let entry = match req_vid.as_deref() {
2810                    Some(vid) => {
2811                        mgr.lookup_version(&get_bucket, &get_key, vid)
2812                            .ok_or_else(|| {
2813                                S3Error::with_message(
2814                                    S3ErrorCode::NoSuchVersion,
2815                                    format!("no such version: {vid}"),
2816                                )
2817                            })?
2818                    }
2819                    None => mgr.lookup_latest(&get_bucket, &get_key).ok_or_else(|| {
2820                        S3Error::with_message(
2821                            S3ErrorCode::NoSuchKey,
2822                            format!("no such key: {get_key}"),
2823                        )
2824                    })?,
2825                };
2826                if entry.is_delete_marker {
2827                    // S3 spec: GET without versionId on a
2828                    // delete-marker latest → 404 NoSuchKey + the
2829                    // response carries `x-amz-delete-marker: true`.
2830                    // GET with explicit versionId pointing at a delete
2831                    // marker → 405 MethodNotAllowed; we surface
2832                    // NoSuchKey here for both since s3s collapses them
2833                    // into the same not-found error path.
2834                    return Err(S3Error::with_message(
2835                        S3ErrorCode::NoSuchKey,
2836                        format!("delete marker is the current version of {get_key}"),
2837                    ));
2838                }
2839                if entry.version_id != crate::versioning::NULL_VERSION_ID {
2840                    req.input.key = versioned_shadow_key(&get_key, &entry.version_id);
2841                }
2842                Some(entry.version_id)
2843            }
2844            _ => None,
2845        };
2846
2847        // ====== Range GET の partial-fetch fast path (sidecar index 利用) ======
2848        // sidecar `<key>.s4index` が存在し、multipart-framed object であれば
2849        // 必要 frame だけを backend に Range GET し帯域節約する。
2850        //
2851        // v0.8.4 #73 H-2: BEFORE trusting the sidecar's frame offsets,
2852        // verify the source object hasn't been overwritten / mutated since
2853        // the sidecar was stamped. The sidecar carries the backend ETag
2854        // captured at PUT time (`source_etag`); a HEAD against the current
2855        // backend object tells us the live ETag. If they disagree we treat
2856        // the sidecar as stale and fall through to the full-GET path —
2857        // returning the wrong frames for a Range request would surface as
2858        // a CRC mismatch deeper in the stack but would also potentially
2859        // disclose unrelated frames if a hostile operator wrote the
2860        // sidecar themselves. Fail-open to "full read" is the safe default.
2861        //
2862        // Legacy v1 sidecars (no `source_etag` populated) keep the old
2863        // best-effort behaviour so existing on-disk indexes don't suddenly
2864        // start missing the partial-fetch path.
2865        if let Some(ref r) = range_request
2866            && let Some(index) = self.read_sidecar(&req.input.bucket, &req.input.key).await
2867            && self
2868                .sidecar_version_binding_ok(&req.input.bucket, &req.input.key, &index)
2869                .await
2870        {
2871            let total = index.total_original_size();
2872            let (start, end_exclusive) = match resolve_range(r, total) {
2873                Ok(v) => v,
2874                Err(e) => {
2875                    return Err(S3Error::with_message(S3ErrorCode::InvalidRange, e));
2876                }
2877            };
2878            if let Some(plan) = index.lookup_range(start, end_exclusive) {
2879                return self
2880                    .partial_range_get(&req, plan, start, end_exclusive, total, get_start)
2881                    .await;
2882            }
2883        }
2884        let mut resp = self.backend.get_object(req).await?;
2885        // v0.5 #34: stamp the resolved version-id so the client sees a
2886        // coherent `x-amz-version-id` header (only for chains owned by
2887        // the manager — Unversioned buckets / no-manager paths never
2888        // set this).
2889        if let Some(ref vid) = resolved_version_id {
2890            resp.output.version_id = Some(vid.clone());
2891        }
2892        let is_multipart = is_multipart_object(&resp.output.metadata);
2893        let is_framed_v2 = is_framed_v2_object(&resp.output.metadata);
2894        // v0.2 #4: framed-v2 single-PUT は多 frame parse が必要なので
2895        // multipart と同じ path に流す。
2896        let needs_frame_parse = is_multipart || is_framed_v2;
2897        let manifest_opt = extract_manifest(&resp.output.metadata);
2898
2899        if !needs_frame_parse && manifest_opt.is_none() {
2900            // S4 が書いていないオブジェクトは透過 (raw bucket pre-existing object 等)
2901            debug!("S4 get_object: object lacks s4-codec metadata, returning as-is");
2902            return Ok(resp);
2903        }
2904
2905        if let Some(blob) = resp.output.body.take() {
2906            // v0.4 #21 / v0.5 #27: if the object was stored under SSE
2907            // (metadata flag `s4-encrypted: aes-256-gcm`), decrypt
2908            // before any frame parse / streaming decompress. Encrypted
2909            // bodies are opaque to the codec; this also forces the
2910            // buffered path because AES-GCM needs the full body for tag
2911            // verify. SSE-C uses the per-request customer key, SSE-S4
2912            // falls back to the configured keyring.
2913            let blob = if is_sse_encrypted(&resp.output.metadata) {
2914                let body = collect_blob(blob, self.max_body_bytes)
2915                    .await
2916                    .map_err(internal("collect SSE-encrypted body"))?;
2917                // v0.5 #28: peek the frame magic to route the right
2918                // decrypt path. S4E4 means SSE-KMS — unwrap the DEK
2919                // through the KMS backend (async). S4E1/E2/E3 take
2920                // the sync path (keyring or customer key).
2921                //
2922                // v0.8 #52 (S4E5) / v0.8.1 #57 (S4E6): the chunked
2923                // SSE-S4 frames take the *streaming* path — we hand
2924                // the response body a per-chunk verify-and-emit
2925                // Stream so the client sees chunk 0 plaintext after
2926                // one chunk-worth of AES-GCM verify (vs. waiting
2927                // for the whole body's tag), and the gateway no
2928                // longer needs to materialize the full plaintext
2929                // in memory before responding. SSE-C is out of
2930                // scope for the chunked path (chunked S4E3 is a
2931                // follow-up), so this branch requires the SSE-S4
2932                // keyring to be wired and `get_sse_c_material` to
2933                // be absent — otherwise we surface a clear
2934                // misconfiguration error instead of silently
2935                // falling through to the buffered chunked path.
2936                if matches!(crate::sse::peek_magic(&body), Some("S4E5") | Some("S4E6"))
2937                    && get_sse_c_material.is_none()
2938                {
2939                    let keyring_arc = self.sse_keyring.clone().ok_or_else(|| {
2940                        S3Error::with_message(
2941                            S3ErrorCode::InvalidRequest,
2942                            "object is SSE-S4 encrypted (S4E5/S4E6) but no --sse-s4-key is configured on this gateway",
2943                        )
2944                    })?;
2945                    let body_len = body.len() as u64;
2946                    let stream = crate::sse::decrypt_chunked_stream(body, keyring_arc.as_ref());
2947                    // Stream is `'static` (the keyring borrow is
2948                    // consumed up front; the cipher lives inside
2949                    // the stream state — see decrypt_chunked_stream
2950                    // doc), so we can move it straight into a
2951                    // StreamingBlob without lifetime gymnastics.
2952                    use futures::StreamExt;
2953                    let mapped = stream.map(|r| {
2954                        r.map_err(|e| std::io::Error::other(format!("SSE-S4 chunked decrypt: {e}")))
2955                    });
2956                    use s3s::dto::StreamingBlob;
2957                    resp.output.body = Some(StreamingBlob::wrap(mapped));
2958                    // Plaintext content_length is unknown until all
2959                    // chunks have been verified; null it out so the
2960                    // ByteStream wrapper reports `unknown` to the
2961                    // HTTP layer (which then emits chunked transfer-
2962                    // encoding) rather than lying about the size.
2963                    resp.output.content_length = None;
2964                    // The backend's checksums + ETag describe the
2965                    // encrypted body (S4E5/S4E6 wire format), not
2966                    // the plaintext we're about to stream — clear them
2967                    // so the AWS SDK doesn't fail the GET with a
2968                    // ChecksumMismatch on a successful round-trip.
2969                    // Mirrors the streaming-zstd path at L1180-1185.
2970                    resp.output.checksum_crc32 = None;
2971                    resp.output.checksum_crc32c = None;
2972                    resp.output.checksum_crc64nvme = None;
2973                    resp.output.checksum_sha1 = None;
2974                    resp.output.checksum_sha256 = None;
2975                    resp.output.e_tag = None;
2976                    let elapsed = get_start.elapsed();
2977                    crate::metrics::record_get(
2978                        "sse-s4-chunked",
2979                        body_len,
2980                        body_len,
2981                        elapsed.as_secs_f64(),
2982                        true,
2983                    );
2984                    return Ok(resp);
2985                }
2986                let plain = match crate::sse::peek_magic(&body) {
2987                    Some("S4E4") => {
2988                        let kms = self.kms.as_ref().ok_or_else(|| {
2989                            S3Error::with_message(
2990                                S3ErrorCode::InvalidRequest,
2991                                "object is SSE-KMS encrypted but no --kms-local-dir / --kms-aws-region is configured on this gateway",
2992                            )
2993                        })?;
2994                        let kms_ref: &dyn crate::kms::KmsBackend = kms.as_ref();
2995                        crate::sse::decrypt_with_kms(&body, kms_ref)
2996                            .await
2997                            .map_err(|e| match e {
2998                                crate::sse::SseError::KmsBackend(k) => kms_error_to_s3(k),
2999                                other => S3Error::with_message(
3000                                    S3ErrorCode::InternalError,
3001                                    format!("SSE-KMS decrypt failed: {other}"),
3002                                ),
3003                            })?
3004                    }
3005                    _ => {
3006                        if let Some(ref m) = get_sse_c_material {
3007                            crate::sse::decrypt(
3008                                &body,
3009                                crate::sse::SseSource::CustomerKey {
3010                                    key: &m.key,
3011                                    key_md5: &m.key_md5,
3012                                },
3013                            )
3014                            .map_err(sse_c_error_to_s3)?
3015                        } else {
3016                            let keyring = self.sse_keyring.as_ref().ok_or_else(|| {
3017                                S3Error::with_message(
3018                                    S3ErrorCode::InvalidRequest,
3019                                    "object is SSE-S4 encrypted but no --sse-s4-key is configured on this gateway",
3020                                )
3021                            })?;
3022                            crate::sse::decrypt(&body, keyring).map_err(|e| {
3023                                S3Error::with_message(
3024                                    S3ErrorCode::InternalError,
3025                                    format!("SSE-S4 decrypt failed: {e}"),
3026                                )
3027                            })?
3028                        }
3029                    }
3030                };
3031                // v0.5 #28: parse out the on-disk wrapped DEK's key id
3032                // so the GET response can echo `x-amz-server-side-encryption-aws-kms-key-id`.
3033                if matches!(crate::sse::peek_magic(&body), Some("S4E4"))
3034                    && let Ok(hdr) = crate::sse::parse_s4e4_header(&body)
3035                {
3036                    resp.output.server_side_encryption = Some(ServerSideEncryption::from_static(
3037                        ServerSideEncryption::AWS_KMS,
3038                    ));
3039                    resp.output.ssekms_key_id = Some(hdr.key_id.to_string());
3040                }
3041                bytes_to_blob(plain)
3042            } else if let Some(ref m) = get_sse_c_material {
3043                // Client sent SSE-C headers for an unencrypted object —
3044                // mirror AWS S3's 400 InvalidRequest.
3045                let _ = m;
3046                return Err(sse_c_error_to_s3(
3047                    crate::sse::SseError::CustomerKeyUnexpected,
3048                ));
3049            } else {
3050                blob
3051            };
3052            // v0.5 #27: SSE-C echo on success — algorithm + key MD5
3053            // tell the client that the supplied key was the one used.
3054            if let Some(ref m) = get_sse_c_material {
3055                resp.output.sse_customer_algorithm = Some(crate::sse::SSE_C_ALGORITHM.into());
3056                resp.output.sse_customer_key_md5 =
3057                    Some(base64::engine::general_purpose::STANDARD.encode(m.key_md5));
3058            }
3059            // ====== Streaming fast path (CpuZstd, non-multipart, codec supports it) ======
3060            // 大規模 object (e.g. 5 GB) を memory に collect すると OOM するので、
3061            // codec が streaming-aware なら body を chunk-by-chunk で decompress して
3062            // 即座に client に流す。
3063            //
3064            // ただし Range request 時は streaming できない (slice するため total bytes
3065            // が必要) → buffered path に fall through。
3066            if range_request.is_none()
3067                && !needs_frame_parse
3068                && let Some(ref m) = manifest_opt
3069                && supports_streaming_decompress(m.codec)
3070                && m.codec == CodecKind::CpuZstd
3071            {
3072                // v0.8.4 #73 H-1: wrap the decompressor output in a
3073                // rolling-CRC32C verifier so a tampered ciphertext (or a
3074                // backend-side corruption that the zstd decoder happens
3075                // to "successfully" decode into wrong bytes) surfaces as
3076                // a streaming error tail at EOF instead of silently
3077                // delivering corrupt plaintext to the client. The wrap
3078                // is a pure pass-through during the body — no extra
3079                // buffering, TTFB unaffected — and the integrity
3080                // decision lands at the last chunk.
3081                let decompressed_blob = cpu_zstd_decompress_stream(blob);
3082                let verified_reader = Crc32cVerifyingReader::new(
3083                    blob_to_async_read(decompressed_blob),
3084                    m.crc32c,
3085                    m.original_size,
3086                );
3087                let verified_blob = async_read_to_blob(verified_reader);
3088                resp.output.content_length = Some(m.original_size as i64);
3089                resp.output.checksum_crc32 = None;
3090                resp.output.checksum_crc32c = None;
3091                resp.output.checksum_crc64nvme = None;
3092                resp.output.checksum_sha1 = None;
3093                resp.output.checksum_sha256 = None;
3094                resp.output.e_tag = None;
3095                resp.output.body = Some(verified_blob);
3096                let elapsed = get_start.elapsed();
3097                crate::metrics::record_get(
3098                    m.codec.as_str(),
3099                    m.compressed_size,
3100                    m.original_size,
3101                    elapsed.as_secs_f64(),
3102                    true,
3103                );
3104                info!(
3105                    op = "get_object",
3106                    bucket = %get_bucket,
3107                    key = %get_key,
3108                    codec = m.codec.as_str(),
3109                    bytes_in = m.compressed_size,
3110                    bytes_out = m.original_size,
3111                    path = "streaming",
3112                    setup_latency_ms = elapsed.as_millis() as u64,
3113                    "S4 get started (streaming)"
3114                );
3115                return Ok(resp);
3116            }
3117            // Passthrough: そのまま流す (Range なしの場合のみ streaming)
3118            if range_request.is_none()
3119                && !needs_frame_parse
3120                && let Some(ref m) = manifest_opt
3121                && m.codec == CodecKind::Passthrough
3122            {
3123                resp.output.content_length = Some(m.original_size as i64);
3124                resp.output.checksum_crc32 = None;
3125                resp.output.checksum_crc32c = None;
3126                resp.output.checksum_crc64nvme = None;
3127                resp.output.checksum_sha1 = None;
3128                resp.output.checksum_sha256 = None;
3129                resp.output.e_tag = None;
3130                resp.output.body = Some(blob);
3131                debug!("S4 get_object: passthrough streaming");
3132                return Ok(resp);
3133            }
3134
3135            // ====== Buffered slow path (multipart frame parser, GPU codecs) ======
3136            let bytes = collect_blob(blob, self.max_body_bytes)
3137                .await
3138                .map_err(internal("collect get body"))?;
3139
3140            let decompressed = if needs_frame_parse {
3141                // multipart objects と framed-v2 single-PUT objects は同じ
3142                // S4F2 frame 列なので decompress_multipart で統一処理
3143                self.decompress_multipart(bytes).await?
3144            } else {
3145                let manifest = manifest_opt.as_ref().expect("non-multipart guarded above");
3146                self.registry
3147                    .decompress(bytes, manifest)
3148                    .await
3149                    .map_err(internal("registry decompress"))?
3150            };
3151
3152            // Range request があれば slice。なければ full body を返す。
3153            let total_size = decompressed.len() as u64;
3154            let (final_bytes, status_override) = if let Some(r) = range_request.as_ref() {
3155                let (start, end) = resolve_range(r, total_size)
3156                    .map_err(|e| S3Error::with_message(S3ErrorCode::InvalidRange, e))?;
3157                let sliced = decompressed.slice(start as usize..end as usize);
3158                resp.output.content_range = Some(format!(
3159                    "bytes {start}-{}/{total_size}",
3160                    end.saturating_sub(1)
3161                ));
3162                (sliced, Some(http::StatusCode::PARTIAL_CONTENT))
3163            } else {
3164                (decompressed, None)
3165            };
3166            // 解凍後の真のサイズを返す (S3 client は content_length を信頼するので
3167            // 圧縮 size のままだと downstream が body を途中で切ってしまう)
3168            resp.output.content_length = Some(final_bytes.len() as i64);
3169            // 圧縮済 bytes の checksum を返すと AWS SDK 側で StreamingError
3170            // (ChecksumMismatch) になる。ETag も backend が返した「圧縮済 bytes の
3171            // MD5/checksum」なので意味的にズレる — クリアして S4 自身の crc32c
3172            // (manifest 内 / frame 内) で integrity を保証する設計にする。
3173            resp.output.checksum_crc32 = None;
3174            resp.output.checksum_crc32c = None;
3175            resp.output.checksum_crc64nvme = None;
3176            resp.output.checksum_sha1 = None;
3177            resp.output.checksum_sha256 = None;
3178            resp.output.e_tag = None;
3179            let returned_size = final_bytes.len() as u64;
3180            let codec_label = manifest_opt
3181                .as_ref()
3182                .map(|m| m.codec.as_str())
3183                .unwrap_or("multipart");
3184            resp.output.body = Some(bytes_to_blob(final_bytes));
3185            if let Some(status) = status_override {
3186                resp.status = Some(status);
3187            }
3188            let elapsed = get_start.elapsed();
3189            crate::metrics::record_get(codec_label, 0, returned_size, elapsed.as_secs_f64(), true);
3190            info!(
3191                op = "get_object",
3192                bucket = %get_bucket,
3193                key = %get_key,
3194                codec = codec_label,
3195                bytes_out = returned_size,
3196                total_object_size = total_size,
3197                range = range_request.is_some(),
3198                path = "buffered",
3199                latency_ms = elapsed.as_millis() as u64,
3200                "S4 get completed (buffered)"
3201            );
3202        }
3203        // v0.6 #40: echo the recorded `x-amz-replication-status` so
3204        // consumers can poll progress (PENDING / COMPLETED / FAILED).
3205        if let Some(mgr) = self.replication.as_ref()
3206            && let Some(status) = mgr.lookup_status(&get_bucket, &get_key)
3207        {
3208            resp.output.replication_status = Some(s3s::dto::ReplicationStatus::from(
3209                status.as_aws_str().to_owned(),
3210            ));
3211        }
3212        Ok(resp)
3213    }
3214
3215    // === passthrough delegations ===
3216    async fn head_bucket(
3217        &self,
3218        req: S3Request<HeadBucketInput>,
3219    ) -> S3Result<S3Response<HeadBucketOutput>> {
3220        self.backend.head_bucket(req).await
3221    }
3222    async fn list_buckets(
3223        &self,
3224        req: S3Request<ListBucketsInput>,
3225    ) -> S3Result<S3Response<ListBucketsOutput>> {
3226        self.backend.list_buckets(req).await
3227    }
3228    async fn create_bucket(
3229        &self,
3230        req: S3Request<CreateBucketInput>,
3231    ) -> S3Result<S3Response<CreateBucketOutput>> {
3232        self.backend.create_bucket(req).await
3233    }
3234    async fn delete_bucket(
3235        &self,
3236        req: S3Request<DeleteBucketInput>,
3237    ) -> S3Result<S3Response<DeleteBucketOutput>> {
3238        self.backend.delete_bucket(req).await
3239    }
3240    async fn head_object(
3241        &self,
3242        req: S3Request<HeadObjectInput>,
3243    ) -> S3Result<S3Response<HeadObjectOutput>> {
3244        // v0.6 #40: capture bucket/key before req is consumed so the
3245        // replication-status echo can look the entry up.
3246        let head_bucket = req.input.bucket.clone();
3247        let head_key = req.input.key.clone();
3248        let mut resp = self.backend.head_object(req).await?;
3249        if let Some(manifest) = extract_manifest(&resp.output.metadata) {
3250            // 客側には decompress 後の意味のある content_length / checksum を返す。
3251            // backend が返す圧縮済 bytes の checksum / e_tag は意味が違うため除去
3252            // (S4 は manifest 内の crc32c で integrity を担保する)。
3253            resp.output.content_length = Some(manifest.original_size as i64);
3254            resp.output.checksum_crc32 = None;
3255            resp.output.checksum_crc32c = None;
3256            resp.output.checksum_crc64nvme = None;
3257            resp.output.checksum_sha1 = None;
3258            resp.output.checksum_sha256 = None;
3259            resp.output.e_tag = None;
3260        }
3261        // v0.6 #40: echo `x-amz-replication-status` (PENDING / COMPLETED
3262        // / FAILED) so consumers can poll progress without a GET.
3263        if let Some(mgr) = self.replication.as_ref()
3264            && let Some(status) = mgr.lookup_status(&head_bucket, &head_key)
3265        {
3266            resp.output.replication_status = Some(s3s::dto::ReplicationStatus::from(
3267                status.as_aws_str().to_owned(),
3268            ));
3269        }
3270        // v0.7 #48 BUG-4 fix: HEAD must echo SSE indicators so SDKs
3271        // and pipelines see the same posture they got on PUT. The PUT
3272        // path stamps `s4-sse-type` metadata for exactly this — HEAD
3273        // doesn't fetch the body, so it can't peek frame magic.
3274        if let Some(meta) = resp.output.metadata.as_ref()
3275            && let Some(sse_type) = meta.get("s4-sse-type")
3276        {
3277            {
3278                match sse_type.as_str() {
3279                    "aws:kms" => {
3280                        resp.output.server_side_encryption = Some(
3281                            ServerSideEncryption::from_static(ServerSideEncryption::AWS_KMS),
3282                        );
3283                        if let Some(key_id) = meta.get("s4-sse-kms-key-id") {
3284                            resp.output.ssekms_key_id = Some(key_id.clone());
3285                        }
3286                    }
3287                    _ => {
3288                        resp.output.server_side_encryption = Some(
3289                            ServerSideEncryption::from_static(ServerSideEncryption::AES256),
3290                        );
3291                        if let Some(md5) = meta.get("s4-sse-c-key-md5") {
3292                            resp.output.sse_customer_algorithm =
3293                                Some(crate::sse::SSE_C_ALGORITHM.into());
3294                            resp.output.sse_customer_key_md5 = Some(md5.clone());
3295                        }
3296                    }
3297                }
3298            }
3299        }
3300        Ok(resp)
3301    }
3302    async fn delete_object(
3303        &self,
3304        mut req: S3Request<DeleteObjectInput>,
3305    ) -> S3Result<S3Response<DeleteObjectOutput>> {
3306        let bucket = req.input.bucket.clone();
3307        let key = req.input.key.clone();
3308        self.enforce_rate_limit(&req, &bucket)?;
3309        self.enforce_policy(&req, "s3:DeleteObject", &bucket, Some(&key))?;
3310        // v0.6 #42: MFA Delete enforcement. When the bucket has
3311        // MFA-Delete = Enabled, every DELETE / DELETE-version /
3312        // delete-marker form needs `x-amz-mfa: <serial> <code>` (RFC 6238
3313        // 6-digit TOTP). Runs *before* the WORM / versioning routers so
3314        // a missing token is denied for free regardless of which delete
3315        // path the request would otherwise take.
3316        if let Some(mgr) = self.mfa_delete.as_ref()
3317            && mgr.is_enabled(&bucket)
3318        {
3319            let header = req.input.mfa.as_deref();
3320            if let Err(e) = crate::mfa::check_mfa(&bucket, header, mgr, current_unix_secs()) {
3321                crate::metrics::record_mfa_delete_denial(&bucket);
3322                return Err(mfa_error_to_s3(e));
3323            }
3324        }
3325        // v0.5 #30: refuse the delete while a WORM lock is in effect.
3326        // Compliance can never be bypassed; Governance can be overridden
3327        // via `x-amz-bypass-governance-retention: true`; legal hold
3328        // never. The check happens before the versioning router so a
3329        // locked object can't be soft-deleted (delete-marker push) on an
3330        // Enabled bucket either — S3 spec says lock applies to all
3331        // delete forms.
3332        if let Some(mgr) = self.object_lock.as_ref()
3333            && let Some(state) = mgr.get(&bucket, &key)
3334        {
3335            let bypass = req.input.bypass_governance_retention.unwrap_or(false);
3336            let now = chrono::Utc::now();
3337            if !state.can_delete(now, bypass) {
3338                crate::metrics::record_policy_denial("s3:DeleteObject", &bucket);
3339                return Err(S3Error::with_message(
3340                    S3ErrorCode::AccessDenied,
3341                    "Access Denied because object protected by object lock",
3342                ));
3343            }
3344        }
3345        // v0.5 #34: route DELETE through the VersioningManager when the
3346        // bucket is in a versioning-aware state.
3347        //
3348        // - Enabled bucket, no version_id → push a delete marker into
3349        //   the chain. NO backend object is touched (older versions
3350        //   stay reachable via specific-version GET).
3351        // - Enabled / Suspended bucket, with version_id → physical
3352        //   delete. Backend bytes at the shadow key (or `<key>` for
3353        //   `null`) are removed; chain entry is dropped. If the deleted
3354        //   entry was a delete marker, no backend bytes exist for it
3355        //   (record-only).
3356        // - Suspended bucket, no version_id → push a "null" delete
3357        //   marker (S3 spec); backend bytes at `<key>` are physically
3358        //   removed (same as legacy).
3359        // - Unversioned bucket → fall through to legacy passthrough.
3360        if let Some(mgr) = self.versioning.as_ref() {
3361            let state = mgr.state(&bucket);
3362            if state != crate::versioning::VersioningState::Unversioned {
3363                let req_vid = req.input.version_id.take();
3364                if let Some(vid) = req_vid {
3365                    // Specific-version DELETE: touch backend bytes only
3366                    // when the entry was a real version (not a delete
3367                    // marker, which has no backend bytes).
3368                    let outcome = mgr.record_delete_specific(&bucket, &key, &vid);
3369                    let backend_target = if vid == crate::versioning::NULL_VERSION_ID {
3370                        key.clone()
3371                    } else {
3372                        versioned_shadow_key(&key, &vid)
3373                    };
3374                    let was_real_version = outcome
3375                        .as_ref()
3376                        .map(|o| !o.is_delete_marker)
3377                        .unwrap_or(false);
3378                    if was_real_version {
3379                        // Best-effort backend cleanup; missing bytes
3380                        // are not an error (e.g. shadow key already
3381                        // GC'd).
3382                        let backend_input = DeleteObjectInput {
3383                            bucket: bucket.clone(),
3384                            key: backend_target,
3385                            ..Default::default()
3386                        };
3387                        let backend_req = S3Request {
3388                            input: backend_input,
3389                            method: http::Method::DELETE,
3390                            uri: req.uri.clone(),
3391                            headers: req.headers.clone(),
3392                            extensions: http::Extensions::new(),
3393                            credentials: req.credentials.clone(),
3394                            region: req.region.clone(),
3395                            service: req.service.clone(),
3396                            trailing_headers: None,
3397                        };
3398                        let _ = self.backend.delete_object(backend_req).await;
3399                    }
3400                    let mut output = DeleteObjectOutput {
3401                        version_id: Some(vid.clone()),
3402                        ..Default::default()
3403                    };
3404                    if let Some(o) = outcome.as_ref()
3405                        && o.is_delete_marker
3406                    {
3407                        output.delete_marker = Some(true);
3408                    }
3409                    // v0.6 #35: specific-version DELETE always counts as
3410                    // a hard `ObjectRemoved:Delete` event (the chain
3411                    // entry, marker or not, is gone after this call).
3412                    self.fire_delete_notification(
3413                        &bucket,
3414                        &key,
3415                        crate::notifications::EventType::ObjectRemovedDelete,
3416                        Some(vid.clone()),
3417                    );
3418                    return Ok(S3Response::new(output));
3419                }
3420                // No version_id: record a delete marker (state-aware).
3421                let outcome = mgr.record_delete(&bucket, &key);
3422                if state == crate::versioning::VersioningState::Suspended {
3423                    // Suspended buckets also evict the prior `<key>`
3424                    // bytes (the previous null version is gone too).
3425                    let backend_input = DeleteObjectInput {
3426                        bucket: bucket.clone(),
3427                        key: key.clone(),
3428                        ..Default::default()
3429                    };
3430                    let backend_req = S3Request {
3431                        input: backend_input,
3432                        method: http::Method::DELETE,
3433                        uri: req.uri.clone(),
3434                        headers: req.headers.clone(),
3435                        extensions: http::Extensions::new(),
3436                        credentials: req.credentials.clone(),
3437                        region: req.region.clone(),
3438                        service: req.service.clone(),
3439                        trailing_headers: None,
3440                    };
3441                    let _ = self.backend.delete_object(backend_req).await;
3442                }
3443                let output = DeleteObjectOutput {
3444                    delete_marker: Some(true),
3445                    version_id: outcome.version_id.clone(),
3446                    ..Default::default()
3447                };
3448                // v0.6 #35: versioned bucket DELETE without a version-id
3449                // creates a delete marker — the dedicated AWS event
3450                // taxonomy entry. Suspended-state buckets also push a
3451                // (null) marker, so the same event fires there.
3452                self.fire_delete_notification(
3453                    &bucket,
3454                    &key,
3455                    crate::notifications::EventType::ObjectRemovedDeleteMarker,
3456                    outcome.version_id,
3457                );
3458                return Ok(S3Response::new(output));
3459            }
3460        }
3461        // Legacy / Unversioned path: physical delete on the backend +
3462        // best-effort sidecar cleanup (mirrors v0.4 behaviour).
3463        let resp = self.backend.delete_object(req).await?;
3464        // v0.5 #30: drop any per-object lock state once the delete has
3465        // succeeded so the freed key can be re-armed by a future PUT
3466        // under the bucket default. Reaching here implies the lock had
3467        // already passed `can_delete` above, so this is purely cleanup.
3468        if let Some(mgr) = self.object_lock.as_ref() {
3469            mgr.clear(&bucket, &key);
3470        }
3471        // v0.6 #39: drop any object-level tag set on physical delete —
3472        // the freed key starts a fresh tag history if a future PUT
3473        // re-creates it. (Versioned-delete branches above return early
3474        // and do NOT touch tags, mirroring AWS where tag state is
3475        // attached to the logical key, not the version chain.)
3476        if let Some(mgr) = self.tagging.as_ref() {
3477            mgr.delete_object_tags(&bucket, &key);
3478        }
3479        let sidecar = sidecar_key(&key);
3480        // v0.7 #49: skip the sidecar DELETE if the key + sidecar suffix
3481        // can't be encoded into a request URI — the primary delete
3482        // already succeeded and a stale sidecar is harmless (Range GET
3483        // re-validates the underlying object on next read).
3484        if let Ok(uri) = safe_object_uri(&bucket, &sidecar) {
3485            let sidecar_input = DeleteObjectInput {
3486                bucket: bucket.clone(),
3487                key: sidecar,
3488                ..Default::default()
3489            };
3490            let sidecar_req = S3Request {
3491                input: sidecar_input,
3492                method: http::Method::DELETE,
3493                uri,
3494                headers: http::HeaderMap::new(),
3495                extensions: http::Extensions::new(),
3496                credentials: None,
3497                region: None,
3498                service: None,
3499                trailing_headers: None,
3500            };
3501            let _ = self.backend.delete_object(sidecar_req).await;
3502        }
3503        // v0.6 #35: legacy unversioned-bucket hard delete fires the
3504        // canonical `ObjectRemoved:Delete` event.
3505        self.fire_delete_notification(
3506            &bucket,
3507            &key,
3508            crate::notifications::EventType::ObjectRemovedDelete,
3509            None,
3510        );
3511        Ok(resp)
3512    }
3513    async fn delete_objects(
3514        &self,
3515        req: S3Request<DeleteObjectsInput>,
3516    ) -> S3Result<S3Response<DeleteObjectsOutput>> {
3517        // v0.6 #42: MFA Delete applies once to the whole batch (S3 spec:
3518        // when MFA-Delete is on the bucket, a missing / invalid token
3519        // fails the entire DeleteObjects request, not per-object).
3520        if let Some(mgr) = self.mfa_delete.as_ref()
3521            && mgr.is_enabled(&req.input.bucket)
3522        {
3523            let header = req.input.mfa.as_deref();
3524            if let Err(e) =
3525                crate::mfa::check_mfa(&req.input.bucket, header, mgr, current_unix_secs())
3526            {
3527                crate::metrics::record_mfa_delete_denial(&req.input.bucket);
3528                return Err(mfa_error_to_s3(e));
3529            }
3530        }
3531        self.backend.delete_objects(req).await
3532    }
3533    async fn copy_object(
3534        &self,
3535        mut req: S3Request<CopyObjectInput>,
3536    ) -> S3Result<S3Response<CopyObjectOutput>> {
3537        // copy is conceptually "GetObject src + PutObject dst" — enforce both.
3538        let dst_bucket = req.input.bucket.clone();
3539        let dst_key = req.input.key.clone();
3540        self.enforce_policy(&req, "s3:PutObject", &dst_bucket, Some(&dst_key))?;
3541        if let CopySource::Bucket { bucket, key, .. } = &req.input.copy_source {
3542            self.enforce_policy(&req, "s3:GetObject", bucket, Some(key))?;
3543        }
3544        // S4-aware copy: source object に s4-* metadata がある場合、それを
3545        // destination に確実に preserve する。
3546        //
3547        // - MetadataDirective::COPY (default): backend が source metadata を
3548        //   そのまま copy するので S4 metadata も自動で渡る。介入不要
3549        // - MetadataDirective::REPLACE: 客が指定した metadata で source を
3550        //   上書き → s4-* metadata が消えると destination は decompress 不能に
3551        //   なる (silent corruption)。S4 が source metadata を HEAD で取得し、
3552        //   s4-* fields を input.metadata に強制 merge する
3553        let needs_merge = req
3554            .input
3555            .metadata_directive
3556            .as_ref()
3557            .map(|d| d.as_str() == MetadataDirective::REPLACE)
3558            .unwrap_or(false);
3559        if needs_merge && let CopySource::Bucket { bucket, key, .. } = &req.input.copy_source {
3560            let head_input = HeadObjectInput {
3561                bucket: bucket.to_string(),
3562                key: key.to_string(),
3563                ..Default::default()
3564            };
3565            let head_req = S3Request {
3566                input: head_input,
3567                method: req.method.clone(),
3568                uri: req.uri.clone(),
3569                headers: req.headers.clone(),
3570                extensions: http::Extensions::new(),
3571                credentials: req.credentials.clone(),
3572                region: req.region.clone(),
3573                service: req.service.clone(),
3574                trailing_headers: None,
3575            };
3576            if let Ok(head) = self.backend.head_object(head_req).await
3577                && let Some(src_meta) = head.output.metadata.as_ref()
3578            {
3579                let dest_meta = req.input.metadata.get_or_insert_with(Default::default);
3580                for key in [
3581                    META_CODEC,
3582                    META_ORIGINAL_SIZE,
3583                    META_COMPRESSED_SIZE,
3584                    META_CRC32C,
3585                    META_MULTIPART,
3586                    META_FRAMED,
3587                ] {
3588                    if let Some(v) = src_meta.get(key) {
3589                        // 客が同じ key を指定していたら preserve しない (= 上書き許可)
3590                        // していたら何もしない。指定していなければ insert
3591                        dest_meta
3592                            .entry(key.to_string())
3593                            .or_insert_with(|| v.clone());
3594                    }
3595                }
3596                debug!(
3597                    src_bucket = %bucket,
3598                    src_key = %key,
3599                    "S4 copy_object: preserved s4-* metadata across REPLACE directive"
3600                );
3601            }
3602        }
3603        self.backend.copy_object(req).await
3604    }
3605    async fn list_objects(
3606        &self,
3607        req: S3Request<ListObjectsInput>,
3608    ) -> S3Result<S3Response<ListObjectsOutput>> {
3609        self.enforce_rate_limit(&req, &req.input.bucket)?;
3610        self.enforce_policy(&req, "s3:ListBucket", &req.input.bucket, None)?;
3611        let mut resp = self.backend.list_objects(req).await?;
3612        // S4 内部 object (`*.s4index` sidecar、`.__s4ver__/` shadow versions
3613        // — v0.5 #34) を顧客から隠す。
3614        if let Some(contents) = resp.output.contents.as_mut() {
3615            contents.retain(|o| {
3616                o.key
3617                    .as_ref()
3618                    .map(|k| !k.ends_with(".s4index") && !is_versioning_shadow_key(k))
3619                    .unwrap_or(true)
3620            });
3621        }
3622        Ok(resp)
3623    }
3624    async fn list_objects_v2(
3625        &self,
3626        req: S3Request<ListObjectsV2Input>,
3627    ) -> S3Result<S3Response<ListObjectsV2Output>> {
3628        self.enforce_rate_limit(&req, &req.input.bucket)?;
3629        self.enforce_policy(&req, "s3:ListBucket", &req.input.bucket, None)?;
3630        let mut resp = self.backend.list_objects_v2(req).await?;
3631        if let Some(contents) = resp.output.contents.as_mut() {
3632            let before = contents.len();
3633            contents.retain(|o| {
3634                o.key
3635                    .as_ref()
3636                    .map(|k| !k.ends_with(".s4index") && !is_versioning_shadow_key(k))
3637                    .unwrap_or(true)
3638            });
3639            // key_count も補正 (S3 spec compliance)
3640            if let Some(kc) = resp.output.key_count.as_mut() {
3641                *kc -= (before - contents.len()) as i32;
3642            }
3643        }
3644        Ok(resp)
3645    }
3646    /// v0.4 #17: filter S4-internal sidecars from versioned listings.
3647    /// v0.5 #34: when a [`crate::versioning::VersioningManager`] is
3648    /// attached AND the bucket is in a versioning-aware state, build
3649    /// the `Versions` / `DeleteMarkers` arrays directly from the
3650    /// in-memory chain (paginated + ordered the S3 way: key asc,
3651    /// version newest-first inside each key). Otherwise fall back to
3652    /// passthrough + sidecar-filter (legacy v0.4 behaviour).
3653    async fn list_object_versions(
3654        &self,
3655        req: S3Request<ListObjectVersionsInput>,
3656    ) -> S3Result<S3Response<ListObjectVersionsOutput>> {
3657        self.enforce_rate_limit(&req, &req.input.bucket)?;
3658        self.enforce_policy(&req, "s3:ListBucket", &req.input.bucket, None)?;
3659        // v0.5 #34: VersioningManager-owned path.
3660        if let Some(mgr) = self.versioning.as_ref()
3661            && mgr.state(&req.input.bucket) != crate::versioning::VersioningState::Unversioned
3662        {
3663            let max_keys = req.input.max_keys.unwrap_or(1000) as usize;
3664            let page = mgr.list_versions(
3665                &req.input.bucket,
3666                req.input.prefix.as_deref(),
3667                req.input.key_marker.as_deref(),
3668                req.input.version_id_marker.as_deref(),
3669                max_keys,
3670            );
3671            let versions: Vec<ObjectVersion> = page
3672                .versions
3673                .into_iter()
3674                .map(|e| ObjectVersion {
3675                    key: Some(e.key),
3676                    version_id: Some(e.version_id),
3677                    is_latest: Some(e.is_latest),
3678                    e_tag: Some(ETag::Strong(e.etag)),
3679                    size: Some(e.size as i64),
3680                    last_modified: Some(std::time::SystemTime::from(e.last_modified).into()),
3681                    ..Default::default()
3682                })
3683                .collect();
3684            let delete_markers: Vec<DeleteMarkerEntry> = page
3685                .delete_markers
3686                .into_iter()
3687                .map(|e| DeleteMarkerEntry {
3688                    key: Some(e.key),
3689                    version_id: Some(e.version_id),
3690                    is_latest: Some(e.is_latest),
3691                    last_modified: Some(std::time::SystemTime::from(e.last_modified).into()),
3692                    ..Default::default()
3693                })
3694                .collect();
3695            let output = ListObjectVersionsOutput {
3696                name: Some(req.input.bucket.clone()),
3697                prefix: req.input.prefix.clone(),
3698                key_marker: req.input.key_marker.clone(),
3699                version_id_marker: req.input.version_id_marker.clone(),
3700                max_keys: req.input.max_keys,
3701                versions: if versions.is_empty() {
3702                    None
3703                } else {
3704                    Some(versions)
3705                },
3706                delete_markers: if delete_markers.is_empty() {
3707                    None
3708                } else {
3709                    Some(delete_markers)
3710                },
3711                is_truncated: Some(page.is_truncated),
3712                next_key_marker: page.next_key_marker,
3713                next_version_id_marker: page.next_version_id_marker,
3714                ..Default::default()
3715            };
3716            return Ok(S3Response::new(output));
3717        }
3718        // Legacy passthrough path (v0.4 #17 sidecar filter retained).
3719        let mut resp = self.backend.list_object_versions(req).await?;
3720        if let Some(versions) = resp.output.versions.as_mut() {
3721            versions.retain(|v| {
3722                v.key
3723                    .as_ref()
3724                    .map(|k| !k.ends_with(".s4index") && !is_versioning_shadow_key(k))
3725                    .unwrap_or(true)
3726            });
3727        }
3728        if let Some(markers) = resp.output.delete_markers.as_mut() {
3729            markers.retain(|m| {
3730                m.key
3731                    .as_ref()
3732                    .map(|k| !k.ends_with(".s4index") && !is_versioning_shadow_key(k))
3733                    .unwrap_or(true)
3734            });
3735        }
3736        Ok(resp)
3737    }
3738
3739    async fn create_multipart_upload(
3740        &self,
3741        mut req: S3Request<CreateMultipartUploadInput>,
3742    ) -> S3Result<S3Response<CreateMultipartUploadOutput>> {
3743        // Multipart object は per-part 圧縮 + frame 形式で書く。GET 時に
3744        // frame parse を起動するため、object metadata に flag を立てる。
3745        // codec は dispatcher の default kind を採用 (per-part 別 codec は Phase 2)。
3746        let codec_kind = self.registry.default_kind();
3747        let meta = req.input.metadata.get_or_insert_with(Default::default);
3748        meta.insert(META_MULTIPART.into(), "true".into());
3749        meta.insert(META_CODEC.into(), codec_kind.as_str().into());
3750        // v0.8 #54 BUG-10 fix: take() the SSE request fields off
3751        // `req.input` so they are NOT forwarded to the backend on
3752        // CreateMultipartUpload. Same root cause as v0.7 #48 BUG-2/3 on
3753        // single-PUT — MinIO rejects SSE-C with "HTTPS required" and
3754        // SSE-KMS with "KMS not configured" when the headers reach it.
3755        // S4 owns the encrypt-then-store contract; we capture the
3756        // recipe in `multipart_state` here and apply it on Complete.
3757        let sse_c_alg = req.input.sse_customer_algorithm.take();
3758        let sse_c_key = req.input.sse_customer_key.take();
3759        let sse_c_md5 = req.input.sse_customer_key_md5.take();
3760        let sse_header = req.input.server_side_encryption.take();
3761        let sse_kms_key = req.input.ssekms_key_id.take();
3762        // Strip the encryption-context too — leaving it would make
3763        // MinIO try to validate it against a non-existent KMS key.
3764        let _ = req.input.ssekms_encryption_context.take();
3765        let sse_c_material = extract_sse_c_material(&sse_c_alg, &sse_c_key, &sse_c_md5)?;
3766        let kms_key_id = extract_kms_key_id(
3767            &sse_header,
3768            &sse_kms_key,
3769            self.kms_default_key_id.as_deref(),
3770        );
3771        // SSE-C / SSE-KMS exclusivity (mirrors put_object L1870).
3772        if sse_c_material.is_some() && kms_key_id.is_some() {
3773            return Err(S3Error::with_message(
3774                S3ErrorCode::InvalidArgument,
3775                "SSE-C and SSE-KMS cannot be used together on the same multipart upload",
3776            ));
3777        }
3778        let sse_mode = if let Some(ref m) = sse_c_material {
3779            // v0.8.2 #62 (H-6 audit fix): wrap the customer-supplied
3780            // 32-byte key in `Zeroizing` so abandoned uploads (or
3781            // normal Complete/Abort) wipe the key bytes on drop. The
3782            // `key_md5` is the public fingerprint and stays as a
3783            // bare `[u8; 16]`.
3784            crate::multipart_state::MultipartSseMode::SseC {
3785                key: zeroize::Zeroizing::new(m.key),
3786                key_md5: m.key_md5,
3787            }
3788        } else if let Some(ref kid) = kms_key_id {
3789            // KMS pre-flight: fail at Create rather than at Complete if
3790            // the gateway has no KMS backend wired (mirrors the
3791            // put_object L1879 check).
3792            if self.kms.is_none() {
3793                return Err(S3Error::with_message(
3794                    S3ErrorCode::InvalidRequest,
3795                    "SSE-KMS requested but no --kms-local-dir / --kms-aws-region is configured on this gateway",
3796                ));
3797            }
3798            crate::multipart_state::MultipartSseMode::SseKms {
3799                key_id: kid.clone(),
3800            }
3801        } else if self.sse_keyring.is_some() {
3802            // SSE-S4: server-driven transparent encryption. Activates
3803            // whenever the gateway has a keyring configured AND the
3804            // client didn't pick a different SSE mode.
3805            crate::multipart_state::MultipartSseMode::SseS4
3806        } else {
3807            crate::multipart_state::MultipartSseMode::None
3808        };
3809        // v0.8 #54 BUG-9 fix: parse the Tagging header on Create. The
3810        // single-PUT path does this on PutObject; the multipart path
3811        // captures it now and commits via TagManager on Complete.
3812        let request_tags: Option<crate::tagging::TagSet> = req
3813            .input
3814            .tagging
3815            .as_deref()
3816            .map(crate::tagging::parse_tagging_header)
3817            .transpose()
3818            .map_err(|e| S3Error::with_message(S3ErrorCode::InvalidArgument, e.to_string()))?;
3819        // Strip the `Tagging` field off the input so the backend
3820        // doesn't try to apply it (no-op on MinIO but keeps the wire
3821        // clean).
3822        let _ = req.input.tagging.take();
3823        // Object Lock recipe (BUG-7 — captured here, applied on Complete).
3824        let explicit_lock_mode: Option<crate::object_lock::LockMode> = req
3825            .input
3826            .object_lock_mode
3827            .as_ref()
3828            .and_then(|m| crate::object_lock::LockMode::from_aws_str(m.as_str()));
3829        let explicit_retain_until: Option<chrono::DateTime<chrono::Utc>> = req
3830            .input
3831            .object_lock_retain_until_date
3832            .as_ref()
3833            .and_then(timestamp_to_chrono_utc);
3834        let explicit_legal_hold_on: bool = req
3835            .input
3836            .object_lock_legal_hold_status
3837            .as_ref()
3838            .map(|s| s.as_str().eq_ignore_ascii_case("ON"))
3839            .unwrap_or(false);
3840        let bucket = req.input.bucket.clone();
3841        let key = req.input.key.clone();
3842        debug!(
3843            bucket = %bucket,
3844            key = %key,
3845            codec = codec_kind.as_str(),
3846            sse = ?sse_mode,
3847            "S4 create_multipart_upload: marking object for per-part compression"
3848        );
3849        let mut resp = self.backend.create_multipart_upload(req).await?;
3850        // Stash the per-upload context only after the backend handed
3851        // us an upload_id (failed Creates leave nothing in the store).
3852        if let Some(upload_id) = resp.output.upload_id.as_ref() {
3853            self.multipart_state.put(
3854                upload_id,
3855                crate::multipart_state::MultipartUploadContext {
3856                    bucket,
3857                    key,
3858                    sse: sse_mode.clone(),
3859                    tags: request_tags,
3860                    object_lock_mode: explicit_lock_mode,
3861                    object_lock_retain_until: explicit_retain_until,
3862                    object_lock_legal_hold: explicit_legal_hold_on,
3863                },
3864            );
3865        }
3866        // SSE-C / SSE-KMS response echo (mirrors put_object L2036-L2050).
3867        match &sse_mode {
3868            crate::multipart_state::MultipartSseMode::SseC { key_md5, .. } => {
3869                resp.output.sse_customer_algorithm = Some(crate::sse::SSE_C_ALGORITHM.into());
3870                resp.output.sse_customer_key_md5 =
3871                    Some(base64::engine::general_purpose::STANDARD.encode(key_md5));
3872            }
3873            crate::multipart_state::MultipartSseMode::SseKms { key_id } => {
3874                resp.output.server_side_encryption = Some(ServerSideEncryption::from_static(
3875                    ServerSideEncryption::AWS_KMS,
3876                ));
3877                resp.output.ssekms_key_id = Some(key_id.clone());
3878            }
3879            _ => {}
3880        }
3881        Ok(resp)
3882    }
3883
3884    async fn upload_part(
3885        &self,
3886        mut req: S3Request<UploadPartInput>,
3887    ) -> S3Result<S3Response<UploadPartOutput>> {
3888        // 各 part を圧縮して frame header 付きで forward。GET 時に
3889        // `decompress_multipart` が frame iter で順に解凍する。
3890        // **per-part codec dispatch**: dispatcher が body 先頭 sample から
3891        // codec を選ぶので、parquet 風の mixed-content multipart で part ごとに
3892        // 最適 codec を使える (整数列 part → Bitcomp、text 列 part → zstd 等)。
3893        //
3894        // v0.8 #54 BUG-5/BUG-10 fix: lookup the per-upload SSE
3895        // context captured by `create_multipart_upload` and (a) strip
3896        // any SSE-C request headers off `req.input` so the backend
3897        // doesn't see them — same root cause as v0.7 #48 BUG-2/3 on
3898        // single-PUT; MinIO refuses SSE-C parts over HTTP — and (b)
3899        // observe that an upload context exists for `upload_id`. The
3900        // actual encrypt happens once at `complete_multipart_upload`
3901        // time on the assembled body (the per-part-encrypt approach
3902        // would require a matching multi-segment decrypt path on GET;
3903        // encrypting the whole assembled body keeps the GET path's
3904        // `is_sse_encrypted` branch in get_object L2429 working
3905        // unchanged).
3906        let sse_ctx = self.multipart_state.get(req.input.upload_id.as_str());
3907        // v0.8.2 #62 (H-1 audit fix): SSE-C key consistency check.
3908        // The AWS S3 spec requires the same SSE-C key headers on
3909        // every UploadPart and rejects mismatches with 400. Prior to
3910        // #62 we silently stripped the headers (BUG-10 fix) without
3911        // validating them, allowing a client to send part 1 under
3912        // key-A and part 2 under key-B; both got stored, then
3913        // re-encrypted with key-A on Complete — the client thinks
3914        // part 2 is under key-B but a GET with key-B would in fact
3915        // hit the part-1 ciphertext that was actually encrypted with
3916        // key-A. That would either decrypt successfully (silent
3917        // corruption: client lost track of which key encrypts what)
3918        // or fail in a confusing way. Validate the per-part headers
3919        // now and reject with 400 InvalidArgument on mismatch /
3920        // omission / partial supply, matching real-S3 behaviour.
3921        if let Some(ref ctx) = sse_ctx {
3922            if let crate::multipart_state::MultipartSseMode::SseC {
3923                key_md5: ctx_md5, ..
3924            } = &ctx.sse
3925            {
3926                let alg = req.input.sse_customer_algorithm.take();
3927                let key_b64 = req.input.sse_customer_key.take();
3928                let md5_b64 = req.input.sse_customer_key_md5.take();
3929                match (alg, key_b64, md5_b64) {
3930                    (Some(a), Some(k), Some(m)) => {
3931                        // Parse + validate; if the per-part headers
3932                        // are themselves malformed (algorithm not
3933                        // AES256, MD5 mismatch, key not 32 bytes)
3934                        // surface the same 400 the single-PUT path
3935                        // would. Then compare the parsed MD5 to the
3936                        // upload-context's MD5; mismatch is a
3937                        // different-key UploadPart and must reject.
3938                        let part_material = crate::sse::parse_customer_key_headers(&a, &k, &m)
3939                            .map_err(sse_c_error_to_s3)?;
3940                        if part_material.key_md5 != *ctx_md5 {
3941                            return Err(S3Error::with_message(
3942                                S3ErrorCode::InvalidArgument,
3943                                "SSE-C key on UploadPart does not match the key supplied on CreateMultipartUpload",
3944                            ));
3945                        }
3946                        // OK — same key as Create. Headers are
3947                        // already taken off `req.input` so the
3948                        // backend never sees them.
3949                    }
3950                    (None, None, None) => {
3951                        // AWS S3 spec: SSE-C headers MUST be replayed
3952                        // on every UploadPart of an SSE-C multipart.
3953                        // Real-S3 returns 400 InvalidRequest in this
3954                        // case; mirror that.
3955                        return Err(S3Error::with_message(
3956                            S3ErrorCode::InvalidRequest,
3957                            "SSE-C requires customer-key headers on every UploadPart (CreateMultipartUpload was SSE-C)",
3958                        ));
3959                    }
3960                    _ => {
3961                        // Partial header set (e.g. algorithm + key
3962                        // but no MD5) — same handling as the
3963                        // single-PUT `extract_sse_c_material` helper.
3964                        return Err(S3Error::with_message(
3965                            S3ErrorCode::InvalidRequest,
3966                            "SSE-C requires all three of: x-amz-server-side-encryption-customer-{algorithm,key,key-MD5}",
3967                        ));
3968                    }
3969                }
3970            } else {
3971                // CreateMultipartUpload was non-SSE-C (None / SseS4 /
3972                // SseKms). A part that arrives carrying SSE-C headers
3973                // is either a confused client or an attempt to
3974                // smuggle SSE-C around the gateway-internal SSE
3975                // recipe. Reject with 400 InvalidRequest rather than
3976                // silently strip — the strip would let the client
3977                // believe the part was encrypted under their key
3978                // when in fact the upload's encryption recipe is
3979                // whatever the Create captured.
3980                if req.input.sse_customer_algorithm.is_some()
3981                    || req.input.sse_customer_key.is_some()
3982                    || req.input.sse_customer_key_md5.is_some()
3983                {
3984                    return Err(S3Error::with_message(
3985                        S3ErrorCode::InvalidRequest,
3986                        "UploadPart sent SSE-C headers but CreateMultipartUpload was not SSE-C",
3987                    ));
3988                }
3989            }
3990        } else {
3991            // No upload context registered (gateway crashed between
3992            // Create and Part, or pre-#62 abandoned-upload restore).
3993            // We can't check key consistency in this case — strip
3994            // the headers and let the request through unchanged so
3995            // the backend's `NoSuchUpload` reply (or whatever it
3996            // chooses to do) flows back to the client.
3997            let _ = req.input.sse_customer_algorithm.take();
3998            let _ = req.input.sse_customer_key.take();
3999            let _ = req.input.sse_customer_key_md5.take();
4000        }
4001        let _sse_ctx = sse_ctx;
4002        if let Some(blob) = req.input.body.take() {
4003            let bytes = collect_blob(blob, self.max_body_bytes)
4004                .await
4005                .map_err(internal("collect upload_part body"))?;
4006            let sample_len = bytes.len().min(SAMPLE_BYTES);
4007            // v0.8 #56: full part body is already in memory here; use its
4008            // length as the size hint so the dispatcher can promote to GPU
4009            // if it's big enough.
4010            let codec_kind = self
4011                .dispatcher
4012                .pick_with_size_hint(&bytes[..sample_len], Some(bytes.len() as u64))
4013                .await;
4014            let original_size = bytes.len() as u64;
4015            // v0.8 #55: telemetry-returning compress (GPU metrics stamp).
4016            let (compress_res, tel) = self
4017                .registry
4018                .compress_with_telemetry(bytes, codec_kind)
4019                .await;
4020            stamp_gpu_compress_telemetry(&tel);
4021            let (compressed, manifest) =
4022                compress_res.map_err(internal("registry compress part"))?;
4023            let header = FrameHeader {
4024                codec: codec_kind,
4025                original_size,
4026                compressed_size: compressed.len() as u64,
4027                crc32c: manifest.crc32c,
4028            };
4029            let mut framed = BytesMut::with_capacity(FRAME_HEADER_BYTES + compressed.len());
4030            write_frame(&mut framed, header, &compressed);
4031            // v0.2 #5: heuristic-based padding skip for likely-final parts.
4032            //
4033            // AWS SDK / aws-cli / boto3 always send the final (and only the
4034            // final) part below the configured part_size. So if the raw user
4035            // part is already smaller than S3's 5 MiB multipart minimum, this
4036            // is overwhelmingly likely to be the final part — and the final
4037            // part is exempt from S3's size constraint. Skipping padding here
4038            // saves up to ~5 MiB per object on highly compressible workloads.
4039            //
4040            // If a misbehaving client sends a tiny **non-final** part, S3
4041            // itself rejects with EntityTooSmall at CompleteMultipartUpload —
4042            // identical outcome to a vanilla S3 PUT, just earlier than
4043            // padding-then-complete would catch it.
4044            let likely_final = original_size < S3_MULTIPART_MIN_PART_BYTES as u64;
4045            if !likely_final {
4046                pad_to_minimum(&mut framed, S3_MULTIPART_MIN_PART_BYTES);
4047            }
4048            let framed_bytes = framed.freeze();
4049            let new_len = framed_bytes.len() as i64;
4050            // 同じ wire 互換問題が multipart にもある (content-length / checksum)
4051            req.input.content_length = Some(new_len);
4052            req.input.checksum_algorithm = None;
4053            req.input.checksum_crc32 = None;
4054            req.input.checksum_crc32c = None;
4055            req.input.checksum_crc64nvme = None;
4056            req.input.checksum_sha1 = None;
4057            req.input.checksum_sha256 = None;
4058            req.input.content_md5 = None;
4059            req.input.body = Some(bytes_to_blob(framed_bytes));
4060            debug!(
4061                part_number = ?req.input.part_number,
4062                upload_id = ?req.input.upload_id,
4063                original_size,
4064                framed_size = new_len,
4065                "S4 upload_part: framed compressed payload"
4066            );
4067        }
4068        self.backend.upload_part(req).await
4069    }
4070    async fn complete_multipart_upload(
4071        &self,
4072        mut req: S3Request<CompleteMultipartUploadInput>,
4073    ) -> S3Result<S3Response<CompleteMultipartUploadOutput>> {
4074        let bucket = req.input.bucket.clone();
4075        let key = req.input.key.clone();
4076        let upload_id = req.input.upload_id.clone();
4077        // v0.8.1 #59: serialise concurrent Complete invocations on the
4078        // same `(bucket, key)`. The race window the lock closes is the
4079        // GET-assembled-body → encrypt → PUT-encrypted-body triple
4080        // below (BUG-5 fix); without serialisation, two Completes for
4081        // different `upload_id` but the same logical key could each
4082        // read the other's plaintext assembled body and overwrite the
4083        // peer's encrypted result. The guard is held to function exit
4084        // (drop on `Ok` / `Err`), covering version-id mint, object-
4085        // lock apply, tagging persist, and replication enqueue too.
4086        let completion_lock = self.multipart_state.completion_lock(&bucket, &key);
4087        let _completion_guard = completion_lock.lock().await;
4088        // v0.8 #54 — fetch the per-upload context captured on Create.
4089        // `None` means an abandoned / unknown upload_id (gateway
4090        // crashed between Create and Complete, or pre-v0.8 state
4091        // restore); we still let the backend do its thing for
4092        // transparency, but we can't apply any SSE / version / lock /
4093        // tag / replication post-processing because we never captured
4094        // the recipe.
4095        let ctx = self.multipart_state.get(upload_id.as_str());
4096        // v0.8 #54 BUG-10 fix: same SSE-C header strip as upload_part
4097        // — some clients (boto3 / aws-sdk-cpp older versions) replay
4098        // the SSE-C triple on Complete too, and MinIO will choke if
4099        // they reach the backend.
4100        let _ = req.input.sse_customer_algorithm.take();
4101        let _ = req.input.sse_customer_key.take();
4102        let _ = req.input.sse_customer_key_md5.take();
4103        let mut resp = self.backend.complete_multipart_upload(req).await?;
4104        // CompleteMultipartUpload 成功 → 完成した object を full fetch して frame
4105        // index を build、`<key>.s4index` sidecar として保存。これで Range GET の
4106        // partial fetch path が利用可能になる (Range request の帯域節約)。
4107        // 注: 巨大 object の場合この pass は重いが、Range query は一度 sidecar が
4108        // できれば爆速になるので 1 回の cost は payback される
4109        //
4110        // v0.8 #54 BUG-5..9: this same fetch is the choke-point for
4111        // the SSE encrypt re-PUT + versioning shadow-key rewrite +
4112        // replication source-bytes capture, so we GET once and reuse
4113        // the bytes for every post-processing step.
4114        let assembled_body: Option<bytes::Bytes> = if let Ok(uri) = safe_object_uri(&bucket, &key) {
4115            let get_input = GetObjectInput {
4116                bucket: bucket.clone(),
4117                key: key.clone(),
4118                ..Default::default()
4119            };
4120            let get_req = S3Request {
4121                input: get_input,
4122                method: http::Method::GET,
4123                uri,
4124                headers: http::HeaderMap::new(),
4125                extensions: http::Extensions::new(),
4126                credentials: None,
4127                region: None,
4128                service: None,
4129                trailing_headers: None,
4130            };
4131            match self.backend.get_object(get_req).await {
4132                Ok(get_resp) => match get_resp.output.body {
4133                    Some(blob) => collect_blob(blob, self.max_body_bytes).await.ok(),
4134                    None => None,
4135                },
4136                Err(e) => {
4137                    // v0.8.4 #71 (C-1 audit fix): a silent
4138                    // `Err(_) => None` here is a SSE plaintext
4139                    // leak. The post-processing block below only
4140                    // runs the SSE re-encrypt branch when
4141                    // `assembled_body.is_some()`, so swallowing a
4142                    // backend error skipped the encrypt step and
4143                    // left the multipart object on disk as
4144                    // plaintext, even on SSE-S4 / SSE-C / SSE-KMS
4145                    // configured buckets. Same root-cause family
4146                    // as v0.8 BUG-5; this branch closes the
4147                    // remaining read-side window.
4148                    //
4149                    // We distinguish two cases:
4150                    //  - `NoSuchKey`: the object is genuinely
4151                    //    missing post-Complete. This is rare and
4152                    //    typically races with a concurrent
4153                    //    DeleteObject; there is nothing to re-
4154                    //    encrypt and no SSE markers to honour, so
4155                    //    falling through to the legacy
4156                    //    `assembled_body = None` path is safe.
4157                    //  - everything else (5xx, network, auth,
4158                    //    etc.): we must FAIL the Complete so the
4159                    //    client can retry. Returning Ok with
4160                    //    `assembled_body = None` would silently
4161                    //    skip the SSE re-encrypt and leave the
4162                    //    backend bytes plaintext.
4163                    if matches!(e.code(), &S3ErrorCode::NoSuchKey) {
4164                        tracing::warn!(
4165                            bucket = %bucket,
4166                            key = %key,
4167                            "multipart Complete: backend GET returned NoSuchKey; \
4168                             skipping post-processing (object likely raced with DeleteObject)"
4169                        );
4170                        None
4171                    } else {
4172                        tracing::error!(
4173                            bucket = %bucket,
4174                            key = %key,
4175                            error = %e,
4176                            "multipart Complete: backend GET failed; failing the Complete \
4177                             so the client retries (silent fall-through would skip SSE \
4178                             re-encrypt and store plaintext)"
4179                        );
4180                        return Err(internal("multipart Complete: backend body fetch failed")(e));
4181                    }
4182                }
4183            }
4184        } else {
4185            None
4186        };
4187        // Sidecar build (existing behaviour, gated on assembled body).
4188        if let Some(ref body) = assembled_body
4189            && let Ok(index) = build_index_from_body(body)
4190        {
4191            self.write_sidecar(&bucket, &key, &index).await;
4192        }
4193        // From here on, post-processing depends on the context —
4194        // short-circuit when the upload had no captured recipe
4195        // (legacy / crashed-Create / pre-v0.8 state restore).
4196        if let Some(ctx) = ctx {
4197            // v0.8 #54 BUG-6 fix: mint a version-id when the bucket
4198            // is versioning-Enabled. The single-PUT path does this in
4199            // `put_object` ~L1968; multipart was the missing branch.
4200            // We mint here (post-Complete, before any re-PUT) so the
4201            // same vid threads into both the shadow-key rewrite and
4202            // the VersionEntry the manager records.
4203            let pending_version: Option<crate::versioning::PutOutcome> = self
4204                .versioning
4205                .as_ref()
4206                .map(|mgr| mgr.state(&bucket))
4207                .map(|state| match state {
4208                    crate::versioning::VersioningState::Enabled => crate::versioning::PutOutcome {
4209                        version_id: crate::versioning::VersioningManager::new_version_id(),
4210                        versioned_response: true,
4211                    },
4212                    crate::versioning::VersioningState::Suspended
4213                    | crate::versioning::VersioningState::Unversioned => {
4214                        crate::versioning::PutOutcome {
4215                            version_id: crate::versioning::NULL_VERSION_ID.to_owned(),
4216                            versioned_response: false,
4217                        }
4218                    }
4219                });
4220            // v0.8 #54 BUG-5 fix: encrypt the assembled framed body
4221            // and re-PUT it to the backend so the on-disk bytes are
4222            // SSE-encrypted. The single-PUT path does this body-by-
4223            // body inside `put_object` (L1907-L1942); for multipart,
4224            // encrypt-per-part would require a multi-segment decrypt
4225            // path on GET — we instead do a single encrypt over the
4226            // assembled framed body so the existing GET decrypt
4227            // branch (`is_sse_encrypted` → `decrypt(body, source)` →
4228            // FrameIter) handles it unchanged.
4229            //
4230            // The cost is one extra round-trip per Complete for SSE-
4231            // enabled multipart (already-paid for the sidecar build).
4232            // For single-instance gateways pointing at a co-located
4233            // backend this is negligible; cross-region operators
4234            // would benefit from per-part encrypt + multi-segment
4235            // decrypt as a follow-up.
4236            let needs_re_put = matches!(
4237                ctx.sse,
4238                crate::multipart_state::MultipartSseMode::SseS4
4239                    | crate::multipart_state::MultipartSseMode::SseC { .. }
4240                    | crate::multipart_state::MultipartSseMode::SseKms { .. }
4241            ) || pending_version
4242                .as_ref()
4243                .map(|pv| pv.versioned_response)
4244                .unwrap_or(false);
4245            // Snapshot replication body in advance so we can pass it
4246            // to the spawn helper after the (possibly absent) re-PUT.
4247            let replication_body = assembled_body.clone();
4248            let mut applied_metadata: Option<std::collections::HashMap<String, String>> = None;
4249            if needs_re_put && let Some(body) = assembled_body {
4250                // v0.8.1 #58: same Zeroizing pattern as put_object's
4251                // single-PUT KMS branch — DEK plaintext lives in
4252                // `Zeroizing<[u8; 32]>` for the lifetime of this
4253                // Complete handler, then is wiped on drop.
4254                let kms_wrap: Option<(zeroize::Zeroizing<[u8; 32]>, crate::kms::WrappedDek)> =
4255                    if let crate::multipart_state::MultipartSseMode::SseKms { ref key_id } = ctx.sse
4256                    {
4257                        let kms = self.kms.as_ref().ok_or_else(|| {
4258                        S3Error::with_message(
4259                            S3ErrorCode::InvalidRequest,
4260                            "SSE-KMS requested but no --kms-local-dir / --kms-aws-region is configured on this gateway",
4261                        )
4262                    })?;
4263                        let (dek, wrapped) =
4264                            kms.generate_dek(key_id).await.map_err(kms_error_to_s3)?;
4265                        if dek.len() != 32 {
4266                            return Err(S3Error::with_message(
4267                                S3ErrorCode::InternalError,
4268                                format!(
4269                                    "KMS backend returned a DEK of {} bytes (expected 32)",
4270                                    dek.len()
4271                                ),
4272                            ));
4273                        }
4274                        let mut dek_arr: zeroize::Zeroizing<[u8; 32]> =
4275                            zeroize::Zeroizing::new([0u8; 32]);
4276                        dek_arr.copy_from_slice(&dek);
4277                        // `dek` (Zeroizing<Vec<u8>>) is dropped at scope end.
4278                        Some((dek_arr, wrapped))
4279                    } else {
4280                        None
4281                    };
4282                // Build the new metadata map: re-fetch via HEAD so
4283                // the multipart / codec markers the backend stamped
4284                // on Create flow through unchanged, then layer the
4285                // SSE markers on top.
4286                let head_req = S3Request {
4287                    input: HeadObjectInput {
4288                        bucket: bucket.clone(),
4289                        key: key.clone(),
4290                        ..Default::default()
4291                    },
4292                    method: http::Method::HEAD,
4293                    uri: safe_object_uri(&bucket, &key)?,
4294                    headers: http::HeaderMap::new(),
4295                    extensions: http::Extensions::new(),
4296                    credentials: None,
4297                    region: None,
4298                    service: None,
4299                    trailing_headers: None,
4300                };
4301                let mut new_metadata: std::collections::HashMap<String, String> =
4302                    match self.backend.head_object(head_req).await {
4303                        Ok(h) => h.output.metadata.unwrap_or_default(),
4304                        Err(_) => std::collections::HashMap::new(),
4305                    };
4306                let new_body = match &ctx.sse {
4307                    crate::multipart_state::MultipartSseMode::SseC { key, key_md5 } => {
4308                        new_metadata.insert("s4-encrypted".into(), "aes-256-gcm".into());
4309                        new_metadata.insert("s4-sse-type".into(), "AES256".into());
4310                        new_metadata.insert(
4311                            "s4-sse-c-key-md5".into(),
4312                            base64::engine::general_purpose::STANDARD.encode(key_md5),
4313                        );
4314                        // v0.8.2 #62: `key` is `&Zeroizing<[u8; 32]>`;
4315                        // auto-deref through one explicit binding so
4316                        // `SseSource::CustomerKey` gets the `&[u8; 32]`
4317                        // it expects (mirrors the SSE-KMS DEK shape
4318                        // a few lines down).
4319                        let key_ref: &[u8; 32] = key;
4320                        crate::sse::encrypt_with_source(
4321                            &body,
4322                            crate::sse::SseSource::CustomerKey {
4323                                key: key_ref,
4324                                key_md5,
4325                            },
4326                        )
4327                    }
4328                    crate::multipart_state::MultipartSseMode::SseKms { .. } => {
4329                        let (dek, wrapped) = kms_wrap
4330                            .as_ref()
4331                            .expect("SseKms branch implies kms_wrap is Some");
4332                        new_metadata.insert("s4-encrypted".into(), "aes-256-gcm".into());
4333                        new_metadata.insert("s4-sse-type".into(), "aws:kms".into());
4334                        new_metadata.insert("s4-sse-kms-key-id".into(), wrapped.key_id.clone());
4335                        // v0.8.1 #58: auto-deref from `&Zeroizing<[u8; 32]>`
4336                        // to `&[u8; 32]` (same shape as the put_object
4337                        // single-PUT branch).
4338                        let dek_ref: &[u8; 32] = dek;
4339                        crate::sse::encrypt_with_source(
4340                            &body,
4341                            crate::sse::SseSource::Kms {
4342                                dek: dek_ref,
4343                                wrapped,
4344                            },
4345                        )
4346                    }
4347                    crate::multipart_state::MultipartSseMode::SseS4 => {
4348                        let keyring = self.sse_keyring.as_ref().ok_or_else(|| {
4349                            S3Error::with_message(
4350                                S3ErrorCode::InternalError,
4351                                "SSE-S4 captured at Create but keyring missing at Complete",
4352                            )
4353                        })?;
4354                        new_metadata.insert("s4-encrypted".into(), "aes-256-gcm".into());
4355                        // SSE-S4 deliberately omits `s4-sse-type` so
4356                        // HEAD doesn't falsely advertise AWS-style
4357                        // SSE-S3 (matches the put_object L1929-L1939
4358                        // comment).
4359                        // v0.8 #52: same chunk_size dispatch as the
4360                        // single-PUT branch — multipart Complete
4361                        // re-encrypts the assembled body, so honoring
4362                        // the chunked path here is required to keep
4363                        // GET streaming on multipart-uploaded objects.
4364                        if self.sse_chunk_size > 0 {
4365                            crate::sse::encrypt_v2_chunked(&body, keyring, self.sse_chunk_size)
4366                                .map_err(|e| {
4367                                    S3Error::with_message(
4368                                        S3ErrorCode::InternalError,
4369                                        format!("SSE-S4 chunked encrypt failed at Complete: {e}"),
4370                                    )
4371                                })?
4372                        } else {
4373                            crate::sse::encrypt_v2(&body, keyring)
4374                        }
4375                    }
4376                    crate::multipart_state::MultipartSseMode::None => body.clone(),
4377                };
4378                // v0.8 #54 BUG-6 fix: write the re-PUT under the
4379                // shadow key so the version chain doesn't overwrite
4380                // the previous version on a versioned bucket. The
4381                // original (unshadowed) key was assembled by the
4382                // backend on Complete; we delete it after the shadow
4383                // PUT lands.
4384                let put_target_key = if let Some(pv) = pending_version.as_ref() {
4385                    if pv.versioned_response {
4386                        versioned_shadow_key(&key, &pv.version_id)
4387                    } else {
4388                        key.clone()
4389                    }
4390                } else {
4391                    key.clone()
4392                };
4393                let new_body_len = new_body.len() as i64;
4394                let put_req = S3Request {
4395                    input: PutObjectInput {
4396                        bucket: bucket.clone(),
4397                        key: put_target_key.clone(),
4398                        body: Some(bytes_to_blob(new_body.clone())),
4399                        metadata: Some(new_metadata.clone()),
4400                        content_length: Some(new_body_len),
4401                        ..Default::default()
4402                    },
4403                    method: http::Method::PUT,
4404                    uri: safe_object_uri(&bucket, &put_target_key)?,
4405                    headers: http::HeaderMap::new(),
4406                    extensions: http::Extensions::new(),
4407                    credentials: None,
4408                    region: None,
4409                    service: None,
4410                    trailing_headers: None,
4411                };
4412                self.backend.put_object(put_req).await?;
4413                // If we rewrote the storage key (versioning shadow),
4414                // we must drop the original (unshadowed) Complete-
4415                // assembled bytes so subsequent listings don't see a
4416                // duplicate.
4417                if put_target_key != key {
4418                    let del_req = S3Request {
4419                        input: DeleteObjectInput {
4420                            bucket: bucket.clone(),
4421                            key: key.clone(),
4422                            ..Default::default()
4423                        },
4424                        method: http::Method::DELETE,
4425                        uri: safe_object_uri(&bucket, &key)?,
4426                        headers: http::HeaderMap::new(),
4427                        extensions: http::Extensions::new(),
4428                        credentials: None,
4429                        region: None,
4430                        service: None,
4431                        trailing_headers: None,
4432                    };
4433                    let _ = self.backend.delete_object(del_req).await;
4434                }
4435                applied_metadata = Some(new_metadata);
4436            }
4437            // v0.8 #54 BUG-6 commit: register the new version with
4438            // the VersioningManager so list_object_versions /
4439            // GET ?versionId= see it.
4440            if let (Some(mgr), Some(pv)) = (self.versioning.as_ref(), pending_version.as_ref()) {
4441                let etag = resp
4442                    .output
4443                    .e_tag
4444                    .clone()
4445                    .map(ETag::into_value)
4446                    .unwrap_or_default();
4447                let now = chrono::Utc::now();
4448                mgr.commit_put_with_version(
4449                    &bucket,
4450                    &key,
4451                    crate::versioning::VersionEntry {
4452                        version_id: pv.version_id.clone(),
4453                        etag,
4454                        size: replication_body
4455                            .as_ref()
4456                            .map(|b| b.len() as u64)
4457                            .unwrap_or(0),
4458                        is_delete_marker: false,
4459                        created_at: now,
4460                    },
4461                );
4462                if pv.versioned_response {
4463                    resp.output.version_id = Some(pv.version_id.clone());
4464                }
4465            }
4466            // v0.8 #54 BUG-7 fix: persist any per-upload Object Lock
4467            // recipe + auto-apply the bucket default. Mirrors the
4468            // put_object L2057-L2074 block.
4469            if let Some(mgr) = self.object_lock.as_ref() {
4470                if ctx.object_lock_mode.is_some()
4471                    || ctx.object_lock_retain_until.is_some()
4472                    || ctx.object_lock_legal_hold
4473                {
4474                    let mut state = mgr.get(&bucket, &key).unwrap_or_default();
4475                    if let Some(m) = ctx.object_lock_mode {
4476                        state.mode = Some(m);
4477                    }
4478                    if let Some(u) = ctx.object_lock_retain_until {
4479                        state.retain_until = Some(u);
4480                    }
4481                    if ctx.object_lock_legal_hold {
4482                        state.legal_hold_on = true;
4483                    }
4484                    mgr.set(&bucket, &key, state);
4485                }
4486                mgr.apply_default_on_put(&bucket, &key, chrono::Utc::now());
4487            }
4488            // v0.8 #54 BUG-9 fix: persist the captured tags via the
4489            // TagManager so GetObjectTagging returns them.
4490            if let (Some(mgr), Some(tags)) = (self.tagging.as_ref(), ctx.tags.as_ref()) {
4491                mgr.put_object_tags(&bucket, &key, tags.clone());
4492            }
4493            // SSE-C / SSE-KMS response echo. The
4494            // CompleteMultipartUploadOutput only exposes
4495            // `server_side_encryption` + `ssekms_key_id` (no
4496            // sse_customer_* — those round-tripped on Create / parts).
4497            match &ctx.sse {
4498                crate::multipart_state::MultipartSseMode::SseC { .. } => {
4499                    resp.output.server_side_encryption = Some(ServerSideEncryption::from_static(
4500                        ServerSideEncryption::AES256,
4501                    ));
4502                }
4503                crate::multipart_state::MultipartSseMode::SseKms { key_id } => {
4504                    resp.output.server_side_encryption = Some(ServerSideEncryption::from_static(
4505                        ServerSideEncryption::AWS_KMS,
4506                    ));
4507                    resp.output.ssekms_key_id = Some(key_id.clone());
4508                }
4509                _ => {}
4510            }
4511            // v0.8 #54 BUG-8 fix: fire cross-bucket replication just
4512            // like put_object L2165 does. We hand the dispatcher the
4513            // assembled body bytes (post-encrypt where applicable, so
4514            // the destination ends up byte-identical to the source's
4515            // on-disk shape) plus the metadata that was actually
4516            // committed.
4517            let replication_body_bytes = replication_body.unwrap_or_default();
4518            // v0.8.2 #61: thread the multipart-Complete `pending_version`
4519            // through so a versioning-Enabled source's destination
4520            // receives the same shadow-key path (mirror of the
4521            // single-PUT branch above).
4522            self.spawn_replication_if_matched(
4523                &bucket,
4524                &key,
4525                &ctx.tags,
4526                &replication_body_bytes,
4527                &applied_metadata,
4528                true,
4529                pending_version.as_ref(),
4530            );
4531            self.multipart_state.remove(upload_id.as_str());
4532        }
4533        // v0.8.1 #59 janitor: best-effort sweep of stale completion
4534        // locks while we are still on the critical path of a single
4535        // Complete (so steady-state workloads of unique keys don't
4536        // accumulate `DashMap` entries). The sweep only retires
4537        // entries whose `Arc::strong_count == 1`, so any other in-
4538        // flight Complete on a different key keeps its lock alive.
4539        // Our own `_completion_guard` keeps `bucket`/`key`'s entry
4540        // alive across this call; it's reaped on the next Complete or
4541        // the next caller-driven prune.
4542        self.multipart_state.prune_completion_locks();
4543        Ok(resp)
4544    }
4545    async fn abort_multipart_upload(
4546        &self,
4547        req: S3Request<AbortMultipartUploadInput>,
4548    ) -> S3Result<S3Response<AbortMultipartUploadOutput>> {
4549        // v0.8 #54: drop the per-upload state (SSE-C key bytes / tag
4550        // set) promptly so an aborted upload doesn't leak the
4551        // customer's key into a long-running gateway's RSS.
4552        //
4553        // v0.8.4 #71 (H-7 audit fix): backend.abort_multipart_upload
4554        // FIRST, then drop in-process state ONLY on success. The
4555        // previous order ("remove → call backend") meant a transient
4556        // backend abort failure (5xx, network) wiped the SSE-C key
4557        // bytes locally while leaving the parts on the backend, so a
4558        // client retry would have to re-validate the SSE-C key against
4559        // a context the gateway no longer has — and the retried abort
4560        // would still hit the unaborted backend parts. Calling the
4561        // backend first lets the failure propagate to the client with
4562        // state intact for a clean retry; only on success do we wipe
4563        // the local state.
4564        let upload_id = req.input.upload_id.as_str().to_owned();
4565        let resp = self.backend.abort_multipart_upload(req).await?;
4566        self.multipart_state.remove(&upload_id);
4567        Ok(resp)
4568    }
4569    async fn list_multipart_uploads(
4570        &self,
4571        req: S3Request<ListMultipartUploadsInput>,
4572    ) -> S3Result<S3Response<ListMultipartUploadsOutput>> {
4573        self.backend.list_multipart_uploads(req).await
4574    }
4575    async fn list_parts(
4576        &self,
4577        req: S3Request<ListPartsInput>,
4578    ) -> S3Result<S3Response<ListPartsOutput>> {
4579        self.backend.list_parts(req).await
4580    }
4581
4582    // =========================================================================
4583    // Phase 2 — pure passthrough delegations。S4 はこれらに対して圧縮 hook を
4584    // 持たないので、backend (= AWS S3) の動作と完全に同一。
4585    //
4586    // 既知の制限事項:
4587    // - copy_object / upload_part_copy: source object が S4-compressed の場合、
4588    //   backend が bytes を copy するだけなので metadata (s4-codec etc) も一緒に
4589    //   coppied される (AWS S3 default = MetadataDirective COPY)。GET は manifest
4590    //   経由で正しく decompress できる。MetadataDirective REPLACE で上書き
4591    //   されると圧縮 metadata が消えて壊れる — 顧客側の運用で注意
4592    // - list_object_versions: versioning enabled bucket では各 version も S4
4593    //   metadata を維持する。古い version も S4 経由で正しく GET できる。
4594    // =========================================================================
4595
4596    // ---- Object ACL / tagging / attributes ----
4597    async fn get_object_acl(
4598        &self,
4599        req: S3Request<GetObjectAclInput>,
4600    ) -> S3Result<S3Response<GetObjectAclOutput>> {
4601        self.backend.get_object_acl(req).await
4602    }
4603    async fn put_object_acl(
4604        &self,
4605        req: S3Request<PutObjectAclInput>,
4606    ) -> S3Result<S3Response<PutObjectAclOutput>> {
4607        self.backend.put_object_acl(req).await
4608    }
4609    // v0.6 #39: object tagging — when a `TagManager` is attached the
4610    // configuration / per-(bucket, key) state lives in the manager and
4611    // these handlers serve directly from it; when no manager is
4612    // attached they fall back to the backend (legacy passthrough so
4613    // v0.5 deployments are unaffected).
4614    async fn get_object_tagging(
4615        &self,
4616        req: S3Request<GetObjectTaggingInput>,
4617    ) -> S3Result<S3Response<GetObjectTaggingOutput>> {
4618        let Some(mgr) = self.tagging.as_ref() else {
4619            return self.backend.get_object_tagging(req).await;
4620        };
4621        let tags = mgr
4622            .get_object_tags(&req.input.bucket, &req.input.key)
4623            .unwrap_or_default();
4624        Ok(S3Response::new(GetObjectTaggingOutput {
4625            tag_set: tagset_to_aws(&tags),
4626            ..Default::default()
4627        }))
4628    }
4629    async fn put_object_tagging(
4630        &self,
4631        req: S3Request<PutObjectTaggingInput>,
4632    ) -> S3Result<S3Response<PutObjectTaggingOutput>> {
4633        let Some(mgr) = self.tagging.as_ref() else {
4634            return self.backend.put_object_tagging(req).await;
4635        };
4636        let bucket = req.input.bucket.clone();
4637        let key = req.input.key.clone();
4638        let parsed = aws_to_tagset(&req.input.tagging.tag_set)
4639            .map_err(|e| S3Error::with_message(S3ErrorCode::InvalidArgument, e.to_string()))?;
4640        // v0.6 #39: gate via IAM policy with both the request tags
4641        // (`s3:RequestObjectTag/<key>`) and any existing tags on the
4642        // target object (`s3:ExistingObjectTag/<key>`).
4643        let existing = mgr.get_object_tags(&bucket, &key);
4644        self.enforce_policy_with_extra(
4645            &req,
4646            "s3:PutObjectTagging",
4647            &bucket,
4648            Some(&key),
4649            Some(&parsed),
4650            existing.as_ref(),
4651        )?;
4652        mgr.put_object_tags(&bucket, &key, parsed);
4653        Ok(S3Response::new(PutObjectTaggingOutput::default()))
4654    }
4655    async fn delete_object_tagging(
4656        &self,
4657        req: S3Request<DeleteObjectTaggingInput>,
4658    ) -> S3Result<S3Response<DeleteObjectTaggingOutput>> {
4659        let Some(mgr) = self.tagging.as_ref() else {
4660            return self.backend.delete_object_tagging(req).await;
4661        };
4662        let bucket = req.input.bucket.clone();
4663        let key = req.input.key.clone();
4664        let existing = mgr.get_object_tags(&bucket, &key);
4665        self.enforce_policy_with_extra(
4666            &req,
4667            "s3:DeleteObjectTagging",
4668            &bucket,
4669            Some(&key),
4670            None,
4671            existing.as_ref(),
4672        )?;
4673        mgr.delete_object_tags(&bucket, &key);
4674        Ok(S3Response::new(DeleteObjectTaggingOutput::default()))
4675    }
4676    async fn get_object_attributes(
4677        &self,
4678        req: S3Request<GetObjectAttributesInput>,
4679    ) -> S3Result<S3Response<GetObjectAttributesOutput>> {
4680        self.backend.get_object_attributes(req).await
4681    }
4682    async fn restore_object(
4683        &self,
4684        req: S3Request<RestoreObjectInput>,
4685    ) -> S3Result<S3Response<RestoreObjectOutput>> {
4686        self.backend.restore_object(req).await
4687    }
4688    async fn upload_part_copy(
4689        &self,
4690        req: S3Request<UploadPartCopyInput>,
4691    ) -> S3Result<S3Response<UploadPartCopyOutput>> {
4692        // v0.2 #6: byte-range aware copy when the source is S4-framed.
4693        //
4694        // For a framed source (multipart upload OR single-PUT framed-v2),
4695        // a naive byte-range passthrough would copy compressed bytes that
4696        // don't align with S4 frame boundaries — silently corrupting the
4697        // result. Instead we GET the source through S4 (which handles
4698        // decompression + Range), re-compress + re-frame as a new part,
4699        // and forward as upload_part. For non-framed sources (S4-untouched
4700        // raw objects), passthrough is correct and we keep the original
4701        // (cheaper) code path.
4702        // v0.8.4 #74: propagate the optional `?versionId=<vid>` from the
4703        // copy-source header. Without this, a versioned source bucket
4704        // copy that pins a specific old version would silently fall
4705        // back to "latest", assembling wrong bytes into the destination
4706        // multipart object (silent data corruption).
4707        let CopySource::Bucket {
4708            bucket: src_bucket,
4709            key: src_key,
4710            version_id: src_version_id,
4711        } = &req.input.copy_source
4712        else {
4713            return self.backend.upload_part_copy(req).await;
4714        };
4715        let src_bucket = src_bucket.to_string();
4716        let src_key = src_key.to_string();
4717        let src_version_id: Option<String> = src_version_id.as_deref().map(str::to_owned);
4718
4719        // Probe metadata to decide whether the source needs S4-aware copy.
4720        let head_input = HeadObjectInput {
4721            bucket: src_bucket.clone(),
4722            key: src_key.clone(),
4723            version_id: src_version_id.clone(),
4724            ..Default::default()
4725        };
4726        let head_req = S3Request {
4727            input: head_input,
4728            method: http::Method::HEAD,
4729            uri: req.uri.clone(),
4730            headers: req.headers.clone(),
4731            extensions: http::Extensions::new(),
4732            credentials: req.credentials.clone(),
4733            region: req.region.clone(),
4734            service: req.service.clone(),
4735            trailing_headers: None,
4736        };
4737        let needs_s4_copy = match self.backend.head_object(head_req).await {
4738            Ok(h) => {
4739                is_multipart_object(&h.output.metadata) || is_framed_v2_object(&h.output.metadata)
4740            }
4741            Err(_) => false,
4742        };
4743        if !needs_s4_copy {
4744            return self.backend.upload_part_copy(req).await;
4745        }
4746
4747        // Resolve the optional source byte range to pass to GET.
4748        let source_range = req
4749            .input
4750            .copy_source_range
4751            .as_ref()
4752            .map(|r| parse_copy_source_range(r))
4753            .transpose()
4754            .map_err(|e| S3Error::with_message(S3ErrorCode::InvalidRange, e))?;
4755
4756        // GET source via S4 (handles decompression + sidecar partial fetch
4757        // when range is present). The result is the requested user-visible
4758        // byte range, fully decompressed. version_id is propagated so
4759        // pinned-version copies fetch the exact version requested.
4760        let mut get_input = GetObjectInput {
4761            bucket: src_bucket.clone(),
4762            key: src_key.clone(),
4763            version_id: src_version_id.clone(),
4764            ..Default::default()
4765        };
4766        get_input.range = source_range;
4767        let get_req = S3Request {
4768            input: get_input,
4769            method: http::Method::GET,
4770            uri: req.uri.clone(),
4771            headers: req.headers.clone(),
4772            extensions: http::Extensions::new(),
4773            credentials: req.credentials.clone(),
4774            region: req.region.clone(),
4775            service: req.service.clone(),
4776            trailing_headers: None,
4777        };
4778        let get_resp = self.get_object(get_req).await?;
4779        let blob = get_resp.output.body.ok_or_else(|| {
4780            S3Error::with_message(
4781                S3ErrorCode::InternalError,
4782                "upload_part_copy: empty body from source GET",
4783            )
4784        })?;
4785        let bytes = collect_blob(blob, self.max_body_bytes)
4786            .await
4787            .map_err(internal("collect upload_part_copy source body"))?;
4788
4789        // Compress + frame as a fresh part (mirrors upload_part path).
4790        let sample_len = bytes.len().min(SAMPLE_BYTES);
4791        // v0.8 #56: same size-hint promotion as the upload_part path.
4792        let codec_kind = self
4793            .dispatcher
4794            .pick_with_size_hint(&bytes[..sample_len], Some(bytes.len() as u64))
4795            .await;
4796        let original_size = bytes.len() as u64;
4797        // v0.8 #55: telemetry-returning compress (GPU metrics stamp).
4798        let (compress_res, tel) = self
4799            .registry
4800            .compress_with_telemetry(bytes, codec_kind)
4801            .await;
4802        stamp_gpu_compress_telemetry(&tel);
4803        let (compressed, manifest) =
4804            compress_res.map_err(internal("registry compress upload_part_copy"))?;
4805        let header = FrameHeader {
4806            codec: codec_kind,
4807            original_size,
4808            compressed_size: compressed.len() as u64,
4809            crc32c: manifest.crc32c,
4810        };
4811        let mut framed = BytesMut::with_capacity(FRAME_HEADER_BYTES + compressed.len());
4812        write_frame(&mut framed, header, &compressed);
4813        let likely_final = original_size < S3_MULTIPART_MIN_PART_BYTES as u64;
4814        if !likely_final {
4815            pad_to_minimum(&mut framed, S3_MULTIPART_MIN_PART_BYTES);
4816        }
4817        let framed_bytes = framed.freeze();
4818        let framed_len = framed_bytes.len() as i64;
4819
4820        // Forward as upload_part to the destination multipart upload.
4821        let part_input = UploadPartInput {
4822            bucket: req.input.bucket.clone(),
4823            key: req.input.key.clone(),
4824            part_number: req.input.part_number,
4825            upload_id: req.input.upload_id.clone(),
4826            body: Some(bytes_to_blob(framed_bytes)),
4827            content_length: Some(framed_len),
4828            ..Default::default()
4829        };
4830        let part_req = S3Request {
4831            input: part_input,
4832            method: http::Method::PUT,
4833            uri: req.uri.clone(),
4834            headers: req.headers.clone(),
4835            extensions: http::Extensions::new(),
4836            credentials: req.credentials.clone(),
4837            region: req.region.clone(),
4838            service: req.service.clone(),
4839            trailing_headers: None,
4840        };
4841        let upload_resp = self.backend.upload_part(part_req).await?;
4842
4843        let copy_output = UploadPartCopyOutput {
4844            copy_part_result: Some(CopyPartResult {
4845                e_tag: upload_resp.output.e_tag.clone(),
4846                ..Default::default()
4847            }),
4848            ..Default::default()
4849        };
4850        Ok(S3Response::new(copy_output))
4851    }
4852
4853    // ---- Object lock / retention / legal hold (v0.5 #30) ----
4854    //
4855    // When an `ObjectLockManager` is attached the configuration / per-object
4856    // state lives in the manager and these handlers serve directly from it;
4857    // when no manager is attached they fall back to the backend (legacy
4858    // passthrough so v0.4 deployments are unaffected).
4859    async fn get_object_lock_configuration(
4860        &self,
4861        req: S3Request<GetObjectLockConfigurationInput>,
4862    ) -> S3Result<S3Response<GetObjectLockConfigurationOutput>> {
4863        if let Some(mgr) = self.object_lock.as_ref() {
4864            let cfg = mgr
4865                .bucket_default(&req.input.bucket)
4866                .map(|d| ObjectLockConfiguration {
4867                    object_lock_enabled: Some(ObjectLockEnabled::from_static(
4868                        ObjectLockEnabled::ENABLED,
4869                    )),
4870                    rule: Some(ObjectLockRule {
4871                        default_retention: Some(DefaultRetention {
4872                            days: Some(d.retention_days as i32),
4873                            mode: Some(ObjectLockRetentionMode::from_static(match d.mode {
4874                                crate::object_lock::LockMode::Governance => {
4875                                    ObjectLockRetentionMode::GOVERNANCE
4876                                }
4877                                crate::object_lock::LockMode::Compliance => {
4878                                    ObjectLockRetentionMode::COMPLIANCE
4879                                }
4880                            })),
4881                            years: None,
4882                        }),
4883                    }),
4884                });
4885            let output = GetObjectLockConfigurationOutput {
4886                object_lock_configuration: cfg,
4887            };
4888            return Ok(S3Response::new(output));
4889        }
4890        self.backend.get_object_lock_configuration(req).await
4891    }
4892    async fn put_object_lock_configuration(
4893        &self,
4894        req: S3Request<PutObjectLockConfigurationInput>,
4895    ) -> S3Result<S3Response<PutObjectLockConfigurationOutput>> {
4896        if let Some(mgr) = self.object_lock.as_ref() {
4897            let bucket = req.input.bucket.clone();
4898            if let Some(cfg) = req.input.object_lock_configuration.as_ref()
4899                && let Some(rule) = cfg.rule.as_ref()
4900                && let Some(d) = rule.default_retention.as_ref()
4901            {
4902                let mode = d
4903                    .mode
4904                    .as_ref()
4905                    .and_then(|m| crate::object_lock::LockMode::from_aws_str(m.as_str()))
4906                    .ok_or_else(|| {
4907                        S3Error::with_message(
4908                            S3ErrorCode::InvalidRequest,
4909                            "Object Lock default retention requires a valid Mode (GOVERNANCE | COMPLIANCE)",
4910                        )
4911                    })?;
4912                // S3 spec: exactly one of Days / Years (we accept Days
4913                // outright and convert Years → Days for storage; Years
4914                // is just a UX shorthand on the wire).
4915                let days: u32 = match (d.days, d.years) {
4916                    (Some(d), None) if d > 0 => d as u32,
4917                    (None, Some(y)) if y > 0 => (y as u32).saturating_mul(365),
4918                    _ => {
4919                        return Err(S3Error::with_message(
4920                            S3ErrorCode::InvalidRequest,
4921                            "Object Lock default retention requires exactly one of Days or Years (positive integer)",
4922                        ));
4923                    }
4924                };
4925                mgr.set_bucket_default(
4926                    &bucket,
4927                    crate::object_lock::BucketObjectLockDefault {
4928                        mode,
4929                        retention_days: days,
4930                    },
4931                );
4932            }
4933            return Ok(S3Response::new(PutObjectLockConfigurationOutput::default()));
4934        }
4935        self.backend.put_object_lock_configuration(req).await
4936    }
4937    async fn get_object_legal_hold(
4938        &self,
4939        req: S3Request<GetObjectLegalHoldInput>,
4940    ) -> S3Result<S3Response<GetObjectLegalHoldOutput>> {
4941        if let Some(mgr) = self.object_lock.as_ref() {
4942            let on = mgr
4943                .get(&req.input.bucket, &req.input.key)
4944                .map(|s| s.legal_hold_on)
4945                .unwrap_or(false);
4946            let status = ObjectLockLegalHoldStatus::from_static(if on {
4947                ObjectLockLegalHoldStatus::ON
4948            } else {
4949                ObjectLockLegalHoldStatus::OFF
4950            });
4951            let output = GetObjectLegalHoldOutput {
4952                legal_hold: Some(ObjectLockLegalHold {
4953                    status: Some(status),
4954                }),
4955            };
4956            return Ok(S3Response::new(output));
4957        }
4958        self.backend.get_object_legal_hold(req).await
4959    }
4960    async fn put_object_legal_hold(
4961        &self,
4962        req: S3Request<PutObjectLegalHoldInput>,
4963    ) -> S3Result<S3Response<PutObjectLegalHoldOutput>> {
4964        if let Some(mgr) = self.object_lock.as_ref() {
4965            let on = req
4966                .input
4967                .legal_hold
4968                .as_ref()
4969                .and_then(|h| h.status.as_ref())
4970                .map(|s| s.as_str().eq_ignore_ascii_case("ON"))
4971                .unwrap_or(false);
4972            mgr.set_legal_hold(&req.input.bucket, &req.input.key, on);
4973            return Ok(S3Response::new(PutObjectLegalHoldOutput::default()));
4974        }
4975        self.backend.put_object_legal_hold(req).await
4976    }
4977    async fn get_object_retention(
4978        &self,
4979        req: S3Request<GetObjectRetentionInput>,
4980    ) -> S3Result<S3Response<GetObjectRetentionOutput>> {
4981        if let Some(mgr) = self.object_lock.as_ref() {
4982            let retention = mgr
4983                .get(&req.input.bucket, &req.input.key)
4984                .filter(|s| s.mode.is_some() || s.retain_until.is_some())
4985                .map(|s| {
4986                    let mode = s.mode.map(|m| {
4987                        ObjectLockRetentionMode::from_static(match m {
4988                            crate::object_lock::LockMode::Governance => {
4989                                ObjectLockRetentionMode::GOVERNANCE
4990                            }
4991                            crate::object_lock::LockMode::Compliance => {
4992                                ObjectLockRetentionMode::COMPLIANCE
4993                            }
4994                        })
4995                    });
4996                    let until = s.retain_until.map(chrono_utc_to_timestamp);
4997                    ObjectLockRetention {
4998                        mode,
4999                        retain_until_date: until,
5000                    }
5001                });
5002            let output = GetObjectRetentionOutput { retention };
5003            return Ok(S3Response::new(output));
5004        }
5005        self.backend.get_object_retention(req).await
5006    }
5007    async fn put_object_retention(
5008        &self,
5009        req: S3Request<PutObjectRetentionInput>,
5010    ) -> S3Result<S3Response<PutObjectRetentionOutput>> {
5011        if let Some(mgr) = self.object_lock.as_ref() {
5012            let bucket = req.input.bucket.clone();
5013            let key = req.input.key.clone();
5014            let bypass = req.input.bypass_governance_retention.unwrap_or(false);
5015            let retention = req.input.retention.as_ref().ok_or_else(|| {
5016                S3Error::with_message(
5017                    S3ErrorCode::InvalidRequest,
5018                    "PutObjectRetention requires a Retention element",
5019                )
5020            })?;
5021            let new_mode = retention
5022                .mode
5023                .as_ref()
5024                .and_then(|m| crate::object_lock::LockMode::from_aws_str(m.as_str()));
5025            let new_until = retention
5026                .retain_until_date
5027                .as_ref()
5028                .map(timestamp_to_chrono_utc)
5029                .unwrap_or(None);
5030            let now = chrono::Utc::now();
5031            let existing = mgr.get(&bucket, &key).unwrap_or_default();
5032            // S3 immutability rules:
5033            //   - Compliance is one-way: once set, mode cannot move to
5034            //     Governance, and retain-until cannot be shortened.
5035            //   - Governance can be lengthened freely; shortened only
5036            //     with bypass=true.
5037            if let Some(existing_mode) = existing.mode
5038                && existing_mode == crate::object_lock::LockMode::Compliance
5039                && existing.is_locked(now)
5040            {
5041                if matches!(new_mode, Some(crate::object_lock::LockMode::Governance)) {
5042                    return Err(S3Error::with_message(
5043                        S3ErrorCode::AccessDenied,
5044                        "Cannot downgrade Compliance retention to Governance while lock is active",
5045                    ));
5046                }
5047                if let (Some(prev), Some(next)) = (existing.retain_until, new_until)
5048                    && next < prev
5049                {
5050                    return Err(S3Error::with_message(
5051                        S3ErrorCode::AccessDenied,
5052                        "Cannot shorten Compliance retention while lock is active",
5053                    ));
5054                }
5055            }
5056            if let Some(existing_mode) = existing.mode
5057                && existing_mode == crate::object_lock::LockMode::Governance
5058                && existing.is_locked(now)
5059                && !bypass
5060                && let (Some(prev), Some(next)) = (existing.retain_until, new_until)
5061                && next < prev
5062            {
5063                return Err(S3Error::with_message(
5064                    S3ErrorCode::AccessDenied,
5065                    "Shortening Governance retention requires x-amz-bypass-governance-retention: true",
5066                ));
5067            }
5068            let mut state = existing;
5069            if new_mode.is_some() {
5070                state.mode = new_mode;
5071            }
5072            if new_until.is_some() {
5073                state.retain_until = new_until;
5074            }
5075            mgr.set(&bucket, &key, state);
5076            return Ok(S3Response::new(PutObjectRetentionOutput::default()));
5077        }
5078        self.backend.put_object_retention(req).await
5079    }
5080
5081    // ---- Versioning ----
5082    // list_object_versions is implemented above in the compression-hook
5083    // section so it filters S4-internal sidecars (v0.4 #17) AND, when a
5084    // VersioningManager is attached (v0.5 #34), serves chains directly
5085    // from the in-memory index.
5086    async fn get_bucket_versioning(
5087        &self,
5088        req: S3Request<GetBucketVersioningInput>,
5089    ) -> S3Result<S3Response<GetBucketVersioningOutput>> {
5090        // v0.5 #34: when a VersioningManager is attached, the bucket's
5091        // versioning state lives in the manager (= S4-server's
5092        // authoritative source). Pass-through hits the backend only
5093        // when no manager is configured (legacy v0.4 behaviour).
5094        if let Some(mgr) = self.versioning.as_ref() {
5095            let output = match mgr.state(&req.input.bucket).as_aws_status() {
5096                Some(s) => GetBucketVersioningOutput {
5097                    status: Some(BucketVersioningStatus::from(s.to_owned())),
5098                    ..Default::default()
5099                },
5100                None => GetBucketVersioningOutput::default(),
5101            };
5102            return Ok(S3Response::new(output));
5103        }
5104        self.backend.get_bucket_versioning(req).await
5105    }
5106    async fn put_bucket_versioning(
5107        &self,
5108        req: S3Request<PutBucketVersioningInput>,
5109    ) -> S3Result<S3Response<PutBucketVersioningOutput>> {
5110        // v0.6 #42: MFA gating on the `PutBucketVersioning` request
5111        // itself. S3 spec: when the request body carries an
5112        // `MfaDelete` element (either `Enabled` or `Disabled`), the
5113        // request must include a valid `x-amz-mfa` token — both for
5114        // the *first* enable (so the operator can't quietly side-step
5115        // the gate by never enabling it) and for any subsequent
5116        // change (so a leaked credential alone can't disable MFA
5117        // Delete to bypass it on subsequent DELETEs). Requests that
5118        // omit the `MfaDelete` element entirely (i.e. they flip only
5119        // `Status`) skip this gate, matching AWS.
5120        if let Some(mgr) = self.mfa_delete.as_ref()
5121            && let Some(target_enabled) = req
5122                .input
5123                .versioning_configuration
5124                .mfa_delete
5125                .as_ref()
5126                .map(|m| m.as_str().eq_ignore_ascii_case("Enabled"))
5127        {
5128            let bucket = req.input.bucket.clone();
5129            let header = req.input.mfa.as_deref();
5130            let secret = mgr.lookup_secret(&bucket);
5131            let verified = match (header, secret.as_ref()) {
5132                (Some(h), Some(s)) => match crate::mfa::parse_mfa_header(h) {
5133                    Ok((serial, code)) => {
5134                        serial == s.serial
5135                            && crate::mfa::verify_totp(&s.secret_base32, &code, current_unix_secs())
5136                    }
5137                    Err(_) => false,
5138                },
5139                _ => false,
5140            };
5141            if !verified {
5142                crate::metrics::record_mfa_delete_denial(&bucket);
5143                let err = if header.is_none() {
5144                    crate::mfa::MfaError::Missing
5145                } else {
5146                    crate::mfa::MfaError::InvalidCode
5147                };
5148                return Err(mfa_error_to_s3(err));
5149            }
5150            mgr.set_bucket_state(&bucket, target_enabled);
5151        }
5152        // v0.5 #34: stash the new state in the manager, then forward to
5153        // the backend so any downstream that *also* tracks state
5154        // (e.g. a real S3 backend) stays in sync. Manager-attached but
5155        // backend rejection is treated as a soft-fail (state is still
5156        // owned by the manager).
5157        if let Some(mgr) = self.versioning.as_ref() {
5158            let new_state = match req
5159                .input
5160                .versioning_configuration
5161                .status
5162                .as_ref()
5163                .map(|s| s.as_str())
5164            {
5165                Some(s) if s.eq_ignore_ascii_case("Enabled") => {
5166                    crate::versioning::VersioningState::Enabled
5167                }
5168                Some(s) if s.eq_ignore_ascii_case("Suspended") => {
5169                    crate::versioning::VersioningState::Suspended
5170                }
5171                _ => crate::versioning::VersioningState::Unversioned,
5172            };
5173            mgr.set_state(&req.input.bucket, new_state);
5174            return Ok(S3Response::new(PutBucketVersioningOutput::default()));
5175        }
5176        self.backend.put_bucket_versioning(req).await
5177    }
5178
5179    // ---- Bucket location ----
5180    async fn get_bucket_location(
5181        &self,
5182        req: S3Request<GetBucketLocationInput>,
5183    ) -> S3Result<S3Response<GetBucketLocationOutput>> {
5184        self.backend.get_bucket_location(req).await
5185    }
5186
5187    // ---- Bucket policy ----
5188    async fn get_bucket_policy(
5189        &self,
5190        req: S3Request<GetBucketPolicyInput>,
5191    ) -> S3Result<S3Response<GetBucketPolicyOutput>> {
5192        self.backend.get_bucket_policy(req).await
5193    }
5194    async fn put_bucket_policy(
5195        &self,
5196        req: S3Request<PutBucketPolicyInput>,
5197    ) -> S3Result<S3Response<PutBucketPolicyOutput>> {
5198        self.backend.put_bucket_policy(req).await
5199    }
5200    async fn delete_bucket_policy(
5201        &self,
5202        req: S3Request<DeleteBucketPolicyInput>,
5203    ) -> S3Result<S3Response<DeleteBucketPolicyOutput>> {
5204        self.backend.delete_bucket_policy(req).await
5205    }
5206    async fn get_bucket_policy_status(
5207        &self,
5208        req: S3Request<GetBucketPolicyStatusInput>,
5209    ) -> S3Result<S3Response<GetBucketPolicyStatusOutput>> {
5210        self.backend.get_bucket_policy_status(req).await
5211    }
5212
5213    // ---- Bucket ACL ----
5214    async fn get_bucket_acl(
5215        &self,
5216        req: S3Request<GetBucketAclInput>,
5217    ) -> S3Result<S3Response<GetBucketAclOutput>> {
5218        self.backend.get_bucket_acl(req).await
5219    }
5220    async fn put_bucket_acl(
5221        &self,
5222        req: S3Request<PutBucketAclInput>,
5223    ) -> S3Result<S3Response<PutBucketAclOutput>> {
5224        self.backend.put_bucket_acl(req).await
5225    }
5226
5227    // ---- Bucket CORS (v0.6 #38) ----
5228    async fn get_bucket_cors(
5229        &self,
5230        req: S3Request<GetBucketCorsInput>,
5231    ) -> S3Result<S3Response<GetBucketCorsOutput>> {
5232        if let Some(mgr) = self.cors.as_ref() {
5233            let cfg = mgr.get(&req.input.bucket).ok_or_else(|| {
5234                S3Error::with_message(
5235                    S3ErrorCode::NoSuchCORSConfiguration,
5236                    "The CORS configuration does not exist".to_string(),
5237                )
5238            })?;
5239            let rules: Vec<CORSRule> = cfg
5240                .rules
5241                .into_iter()
5242                .map(|r| CORSRule {
5243                    allowed_headers: if r.allowed_headers.is_empty() {
5244                        None
5245                    } else {
5246                        Some(r.allowed_headers)
5247                    },
5248                    allowed_methods: r.allowed_methods,
5249                    allowed_origins: r.allowed_origins,
5250                    expose_headers: if r.expose_headers.is_empty() {
5251                        None
5252                    } else {
5253                        Some(r.expose_headers)
5254                    },
5255                    id: r.id,
5256                    max_age_seconds: r.max_age_seconds.map(|s| s as i32),
5257                })
5258                .collect();
5259            return Ok(S3Response::new(GetBucketCorsOutput {
5260                cors_rules: Some(rules),
5261            }));
5262        }
5263        self.backend.get_bucket_cors(req).await
5264    }
5265    async fn put_bucket_cors(
5266        &self,
5267        req: S3Request<PutBucketCorsInput>,
5268    ) -> S3Result<S3Response<PutBucketCorsOutput>> {
5269        if let Some(mgr) = self.cors.as_ref() {
5270            let cfg = crate::cors::CorsConfig {
5271                rules: req
5272                    .input
5273                    .cors_configuration
5274                    .cors_rules
5275                    .into_iter()
5276                    .map(|r| crate::cors::CorsRule {
5277                        allowed_origins: r.allowed_origins,
5278                        allowed_methods: r.allowed_methods,
5279                        allowed_headers: r.allowed_headers.unwrap_or_default(),
5280                        expose_headers: r.expose_headers.unwrap_or_default(),
5281                        max_age_seconds: r
5282                            .max_age_seconds
5283                            .and_then(|s| if s < 0 { None } else { Some(s as u32) }),
5284                        id: r.id,
5285                    })
5286                    .collect(),
5287            };
5288            mgr.put(&req.input.bucket, cfg);
5289            return Ok(S3Response::new(PutBucketCorsOutput::default()));
5290        }
5291        self.backend.put_bucket_cors(req).await
5292    }
5293    async fn delete_bucket_cors(
5294        &self,
5295        req: S3Request<DeleteBucketCorsInput>,
5296    ) -> S3Result<S3Response<DeleteBucketCorsOutput>> {
5297        if let Some(mgr) = self.cors.as_ref() {
5298            mgr.delete(&req.input.bucket);
5299            return Ok(S3Response::new(DeleteBucketCorsOutput::default()));
5300        }
5301        self.backend.delete_bucket_cors(req).await
5302    }
5303
5304    // ---- Bucket lifecycle (v0.6 #37) ----
5305    async fn get_bucket_lifecycle_configuration(
5306        &self,
5307        req: S3Request<GetBucketLifecycleConfigurationInput>,
5308    ) -> S3Result<S3Response<GetBucketLifecycleConfigurationOutput>> {
5309        if let Some(mgr) = self.lifecycle.as_ref() {
5310            let cfg = mgr.get(&req.input.bucket).ok_or_else(|| {
5311                S3Error::with_message(
5312                    S3ErrorCode::NoSuchLifecycleConfiguration,
5313                    "The lifecycle configuration does not exist".to_string(),
5314                )
5315            })?;
5316            let rules: Vec<LifecycleRule> = cfg.rules.iter().map(internal_rule_to_dto).collect();
5317            return Ok(S3Response::new(GetBucketLifecycleConfigurationOutput {
5318                rules: Some(rules),
5319                transition_default_minimum_object_size: None,
5320            }));
5321        }
5322        self.backend.get_bucket_lifecycle_configuration(req).await
5323    }
5324    async fn put_bucket_lifecycle_configuration(
5325        &self,
5326        req: S3Request<PutBucketLifecycleConfigurationInput>,
5327    ) -> S3Result<S3Response<PutBucketLifecycleConfigurationOutput>> {
5328        if let Some(mgr) = self.lifecycle.as_ref() {
5329            let bucket = req.input.bucket.clone();
5330            let dto_cfg = req.input.lifecycle_configuration.unwrap_or_default();
5331            let cfg = dto_lifecycle_to_internal(&dto_cfg);
5332            mgr.put(&bucket, cfg);
5333            return Ok(S3Response::new(
5334                PutBucketLifecycleConfigurationOutput::default(),
5335            ));
5336        }
5337        self.backend.put_bucket_lifecycle_configuration(req).await
5338    }
5339    async fn delete_bucket_lifecycle(
5340        &self,
5341        req: S3Request<DeleteBucketLifecycleInput>,
5342    ) -> S3Result<S3Response<DeleteBucketLifecycleOutput>> {
5343        if let Some(mgr) = self.lifecycle.as_ref() {
5344            mgr.delete(&req.input.bucket);
5345            return Ok(S3Response::new(DeleteBucketLifecycleOutput::default()));
5346        }
5347        self.backend.delete_bucket_lifecycle(req).await
5348    }
5349
5350    // ---- Bucket tagging (v0.6 #39) ----
5351    async fn get_bucket_tagging(
5352        &self,
5353        req: S3Request<GetBucketTaggingInput>,
5354    ) -> S3Result<S3Response<GetBucketTaggingOutput>> {
5355        let Some(mgr) = self.tagging.as_ref() else {
5356            return self.backend.get_bucket_tagging(req).await;
5357        };
5358        let tags = mgr.get_bucket_tags(&req.input.bucket).unwrap_or_default();
5359        Ok(S3Response::new(GetBucketTaggingOutput {
5360            tag_set: tagset_to_aws(&tags),
5361        }))
5362    }
5363    async fn put_bucket_tagging(
5364        &self,
5365        req: S3Request<PutBucketTaggingInput>,
5366    ) -> S3Result<S3Response<PutBucketTaggingOutput>> {
5367        let Some(mgr) = self.tagging.as_ref() else {
5368            return self.backend.put_bucket_tagging(req).await;
5369        };
5370        let bucket = req.input.bucket.clone();
5371        let parsed = aws_to_tagset(&req.input.tagging.tag_set)
5372            .map_err(|e| S3Error::with_message(S3ErrorCode::InvalidArgument, e.to_string()))?;
5373        self.enforce_policy(&req, "s3:PutBucketTagging", &bucket, None)?;
5374        mgr.put_bucket_tags(&bucket, parsed);
5375        Ok(S3Response::new(PutBucketTaggingOutput::default()))
5376    }
5377    async fn delete_bucket_tagging(
5378        &self,
5379        req: S3Request<DeleteBucketTaggingInput>,
5380    ) -> S3Result<S3Response<DeleteBucketTaggingOutput>> {
5381        let Some(mgr) = self.tagging.as_ref() else {
5382            return self.backend.delete_bucket_tagging(req).await;
5383        };
5384        let bucket = req.input.bucket.clone();
5385        self.enforce_policy(&req, "s3:PutBucketTagging", &bucket, None)?;
5386        mgr.delete_bucket_tags(&bucket);
5387        Ok(S3Response::new(DeleteBucketTaggingOutput::default()))
5388    }
5389
5390    // ---- Bucket encryption ----
5391    async fn get_bucket_encryption(
5392        &self,
5393        req: S3Request<GetBucketEncryptionInput>,
5394    ) -> S3Result<S3Response<GetBucketEncryptionOutput>> {
5395        self.backend.get_bucket_encryption(req).await
5396    }
5397    async fn put_bucket_encryption(
5398        &self,
5399        req: S3Request<PutBucketEncryptionInput>,
5400    ) -> S3Result<S3Response<PutBucketEncryptionOutput>> {
5401        self.backend.put_bucket_encryption(req).await
5402    }
5403    async fn delete_bucket_encryption(
5404        &self,
5405        req: S3Request<DeleteBucketEncryptionInput>,
5406    ) -> S3Result<S3Response<DeleteBucketEncryptionOutput>> {
5407        self.backend.delete_bucket_encryption(req).await
5408    }
5409
5410    // ---- Bucket logging ----
5411    async fn get_bucket_logging(
5412        &self,
5413        req: S3Request<GetBucketLoggingInput>,
5414    ) -> S3Result<S3Response<GetBucketLoggingOutput>> {
5415        self.backend.get_bucket_logging(req).await
5416    }
5417    async fn put_bucket_logging(
5418        &self,
5419        req: S3Request<PutBucketLoggingInput>,
5420    ) -> S3Result<S3Response<PutBucketLoggingOutput>> {
5421        self.backend.put_bucket_logging(req).await
5422    }
5423
5424    // ---- Bucket notification (v0.6 #35) ----
5425    //
5426    // When a `NotificationManager` is attached, S4 itself owns per-bucket
5427    // notification configurations and the PUT / GET handlers route through
5428    // the manager. The wire DTO's queue / topic configurations map onto
5429    // S4's `Destination::Sqs` / `Destination::Sns`; LambdaFunction and
5430    // EventBridge configurations are accepted on PUT but silently dropped
5431    // (out of scope for v0.6 #35). When no manager is attached the legacy
5432    // backend-passthrough behaviour applies.
5433    async fn get_bucket_notification_configuration(
5434        &self,
5435        req: S3Request<GetBucketNotificationConfigurationInput>,
5436    ) -> S3Result<S3Response<GetBucketNotificationConfigurationOutput>> {
5437        if let Some(mgr) = self.notifications.as_ref() {
5438            let cfg = mgr.get(&req.input.bucket).unwrap_or_default();
5439            let dto = notif_to_dto(&cfg);
5440            return Ok(S3Response::new(GetBucketNotificationConfigurationOutput {
5441                event_bridge_configuration: dto.event_bridge_configuration,
5442                lambda_function_configurations: dto.lambda_function_configurations,
5443                queue_configurations: dto.queue_configurations,
5444                topic_configurations: dto.topic_configurations,
5445            }));
5446        }
5447        self.backend
5448            .get_bucket_notification_configuration(req)
5449            .await
5450    }
5451    async fn put_bucket_notification_configuration(
5452        &self,
5453        req: S3Request<PutBucketNotificationConfigurationInput>,
5454    ) -> S3Result<S3Response<PutBucketNotificationConfigurationOutput>> {
5455        if let Some(mgr) = self.notifications.as_ref() {
5456            let cfg = notif_from_dto(&req.input.notification_configuration);
5457            mgr.put(&req.input.bucket, cfg);
5458            return Ok(S3Response::new(
5459                PutBucketNotificationConfigurationOutput::default(),
5460            ));
5461        }
5462        self.backend
5463            .put_bucket_notification_configuration(req)
5464            .await
5465    }
5466
5467    // ---- Bucket request payment ----
5468    async fn get_bucket_request_payment(
5469        &self,
5470        req: S3Request<GetBucketRequestPaymentInput>,
5471    ) -> S3Result<S3Response<GetBucketRequestPaymentOutput>> {
5472        self.backend.get_bucket_request_payment(req).await
5473    }
5474    async fn put_bucket_request_payment(
5475        &self,
5476        req: S3Request<PutBucketRequestPaymentInput>,
5477    ) -> S3Result<S3Response<PutBucketRequestPaymentOutput>> {
5478        self.backend.put_bucket_request_payment(req).await
5479    }
5480
5481    // ---- Bucket website ----
5482    async fn get_bucket_website(
5483        &self,
5484        req: S3Request<GetBucketWebsiteInput>,
5485    ) -> S3Result<S3Response<GetBucketWebsiteOutput>> {
5486        self.backend.get_bucket_website(req).await
5487    }
5488    async fn put_bucket_website(
5489        &self,
5490        req: S3Request<PutBucketWebsiteInput>,
5491    ) -> S3Result<S3Response<PutBucketWebsiteOutput>> {
5492        self.backend.put_bucket_website(req).await
5493    }
5494    async fn delete_bucket_website(
5495        &self,
5496        req: S3Request<DeleteBucketWebsiteInput>,
5497    ) -> S3Result<S3Response<DeleteBucketWebsiteOutput>> {
5498        self.backend.delete_bucket_website(req).await
5499    }
5500
5501    // ---- Bucket replication (v0.6 #40) ----
5502    async fn get_bucket_replication(
5503        &self,
5504        req: S3Request<GetBucketReplicationInput>,
5505    ) -> S3Result<S3Response<GetBucketReplicationOutput>> {
5506        if let Some(mgr) = self.replication.as_ref() {
5507            return match mgr.get(&req.input.bucket) {
5508                Some(cfg) => Ok(S3Response::new(GetBucketReplicationOutput {
5509                    replication_configuration: Some(replication_to_dto(&cfg)),
5510                })),
5511                None => Err(S3Error::with_message(
5512                    S3ErrorCode::Custom("ReplicationConfigurationNotFoundError".into()),
5513                    format!(
5514                        "no replication configuration on bucket {}",
5515                        req.input.bucket
5516                    ),
5517                )),
5518            };
5519        }
5520        self.backend.get_bucket_replication(req).await
5521    }
5522    async fn put_bucket_replication(
5523        &self,
5524        req: S3Request<PutBucketReplicationInput>,
5525    ) -> S3Result<S3Response<PutBucketReplicationOutput>> {
5526        if let Some(mgr) = self.replication.as_ref() {
5527            let cfg = replication_from_dto(&req.input.replication_configuration);
5528            mgr.put(&req.input.bucket, cfg);
5529            return Ok(S3Response::new(PutBucketReplicationOutput::default()));
5530        }
5531        self.backend.put_bucket_replication(req).await
5532    }
5533    async fn delete_bucket_replication(
5534        &self,
5535        req: S3Request<DeleteBucketReplicationInput>,
5536    ) -> S3Result<S3Response<DeleteBucketReplicationOutput>> {
5537        if let Some(mgr) = self.replication.as_ref() {
5538            mgr.delete(&req.input.bucket);
5539            return Ok(S3Response::new(DeleteBucketReplicationOutput::default()));
5540        }
5541        self.backend.delete_bucket_replication(req).await
5542    }
5543
5544    // ---- Bucket accelerate ----
5545    async fn get_bucket_accelerate_configuration(
5546        &self,
5547        req: S3Request<GetBucketAccelerateConfigurationInput>,
5548    ) -> S3Result<S3Response<GetBucketAccelerateConfigurationOutput>> {
5549        self.backend.get_bucket_accelerate_configuration(req).await
5550    }
5551    async fn put_bucket_accelerate_configuration(
5552        &self,
5553        req: S3Request<PutBucketAccelerateConfigurationInput>,
5554    ) -> S3Result<S3Response<PutBucketAccelerateConfigurationOutput>> {
5555        self.backend.put_bucket_accelerate_configuration(req).await
5556    }
5557
5558    // ---- Bucket ownership controls ----
5559    async fn get_bucket_ownership_controls(
5560        &self,
5561        req: S3Request<GetBucketOwnershipControlsInput>,
5562    ) -> S3Result<S3Response<GetBucketOwnershipControlsOutput>> {
5563        self.backend.get_bucket_ownership_controls(req).await
5564    }
5565    async fn put_bucket_ownership_controls(
5566        &self,
5567        req: S3Request<PutBucketOwnershipControlsInput>,
5568    ) -> S3Result<S3Response<PutBucketOwnershipControlsOutput>> {
5569        self.backend.put_bucket_ownership_controls(req).await
5570    }
5571    async fn delete_bucket_ownership_controls(
5572        &self,
5573        req: S3Request<DeleteBucketOwnershipControlsInput>,
5574    ) -> S3Result<S3Response<DeleteBucketOwnershipControlsOutput>> {
5575        self.backend.delete_bucket_ownership_controls(req).await
5576    }
5577
5578    // ---- Public access block ----
5579    async fn get_public_access_block(
5580        &self,
5581        req: S3Request<GetPublicAccessBlockInput>,
5582    ) -> S3Result<S3Response<GetPublicAccessBlockOutput>> {
5583        self.backend.get_public_access_block(req).await
5584    }
5585    async fn put_public_access_block(
5586        &self,
5587        req: S3Request<PutPublicAccessBlockInput>,
5588    ) -> S3Result<S3Response<PutPublicAccessBlockOutput>> {
5589        self.backend.put_public_access_block(req).await
5590    }
5591    async fn delete_public_access_block(
5592        &self,
5593        req: S3Request<DeletePublicAccessBlockInput>,
5594    ) -> S3Result<S3Response<DeletePublicAccessBlockOutput>> {
5595        self.backend.delete_public_access_block(req).await
5596    }
5597
5598    // ====================================================================
5599    // v0.6 #41: S3 Select — server-side SQL filter on object body.
5600    //
5601    // Fetch the object via the regular `get_object` path (so SSE-C /
5602    // SSE-S4 / SSE-KMS / S4 codec all decompress + decrypt transparently),
5603    // run a small SQL subset (CSV + JSON Lines, equality / inequality /
5604    // LIKE / AND / OR / NOT) over the in-memory body, and stream the
5605    // matched rows back as AWS event-stream `Records` + `Stats` + `End`
5606    // frames.
5607    //
5608    // Limitations (deliberate, documented):
5609    //   - Parquet input is rejected with NotImplemented.
5610    //   - Aggregates / GROUP BY / JOIN / ORDER BY / LIMIT are rejected at
5611    //     parse time as InvalidRequest (s3s 0.13 doesn't expose AWS's
5612    //     domain-specific `InvalidSqlExpression` code).
5613    //   - The body is fully buffered before SQL evaluation (S3 Select
5614    //     streaming-during-evaluation is v0.7 scope).
5615    //   - GPU-accelerated WHERE evaluation is stubbed out (always None).
5616    async fn select_object_content(
5617        &self,
5618        req: S3Request<SelectObjectContentInput>,
5619    ) -> S3Result<S3Response<SelectObjectContentOutput>> {
5620        use crate::select::{
5621            EventStreamWriter, SelectInputFormat, SelectOutputFormat, run_select_csv,
5622            run_select_jsonlines,
5623        };
5624
5625        let select_bucket = req.input.bucket.clone();
5626        let select_key = req.input.key.clone();
5627        self.enforce_rate_limit(&req, &select_bucket)?;
5628        self.enforce_policy(&req, "s3:GetObject", &select_bucket, Some(&select_key))?;
5629
5630        let request = req.input.request;
5631        let sql = request.expression.clone();
5632        if request.expression_type.as_str() != "SQL" {
5633            return Err(S3Error::with_message(
5634                S3ErrorCode::InvalidExpressionType,
5635                format!(
5636                    "ExpressionType must be SQL, got: {}",
5637                    request.expression_type.as_str()
5638                ),
5639            ));
5640        }
5641
5642        let input_format = if let Some(_json) = request.input_serialization.json.as_ref() {
5643            SelectInputFormat::JsonLines
5644        } else if let Some(csv) = request.input_serialization.csv.as_ref() {
5645            let has_header = csv
5646                .file_header_info
5647                .as_ref()
5648                .map(|h| {
5649                    let s = h.as_str();
5650                    s.eq_ignore_ascii_case("USE") || s.eq_ignore_ascii_case("IGNORE")
5651                })
5652                .unwrap_or(false);
5653            let delim = csv
5654                .field_delimiter
5655                .as_deref()
5656                .and_then(|s| s.chars().next())
5657                .unwrap_or(',');
5658            SelectInputFormat::Csv {
5659                has_header,
5660                delimiter: delim,
5661            }
5662        } else if request.input_serialization.parquet.is_some() {
5663            return Err(S3Error::with_message(
5664                S3ErrorCode::NotImplemented,
5665                "Parquet input is not supported by this S3 Select implementation (v0.6: CSV / JSON Lines only)",
5666            ));
5667        } else {
5668            return Err(S3Error::with_message(
5669                S3ErrorCode::InvalidRequest,
5670                "InputSerialization requires exactly one of CSV / JSON / Parquet",
5671            ));
5672        };
5673        if let Some(ct) = request.input_serialization.compression_type.as_ref()
5674            && !ct.as_str().eq_ignore_ascii_case("NONE")
5675        {
5676            return Err(S3Error::with_message(
5677                S3ErrorCode::NotImplemented,
5678                format!(
5679                    "InputSerialization CompressionType={} is not supported (v0.6: NONE only)",
5680                    ct.as_str()
5681                ),
5682            ));
5683        }
5684
5685        let output_format = if request.output_serialization.json.is_some() {
5686            SelectOutputFormat::Json
5687        } else if request.output_serialization.csv.is_some() {
5688            SelectOutputFormat::Csv
5689        } else {
5690            return Err(S3Error::with_message(
5691                S3ErrorCode::InvalidRequest,
5692                "OutputSerialization requires exactly one of CSV / JSON",
5693            ));
5694        };
5695
5696        let get_input = GetObjectInput {
5697            bucket: select_bucket.clone(),
5698            key: select_key.clone(),
5699            sse_customer_algorithm: req.input.sse_customer_algorithm.clone(),
5700            sse_customer_key: req.input.sse_customer_key.clone(),
5701            sse_customer_key_md5: req.input.sse_customer_key_md5.clone(),
5702            ..Default::default()
5703        };
5704        let get_req = S3Request {
5705            input: get_input,
5706            method: http::Method::GET,
5707            uri: format!("/{}/{}", select_bucket, select_key)
5708                .parse()
5709                .map_err(|e| {
5710                    S3Error::with_message(
5711                        S3ErrorCode::InternalError,
5712                        format!("constructing inner GET URI: {e}"),
5713                    )
5714                })?,
5715            headers: http::HeaderMap::new(),
5716            extensions: http::Extensions::new(),
5717            credentials: req.credentials.clone(),
5718            region: req.region.clone(),
5719            service: req.service.clone(),
5720            trailing_headers: None,
5721        };
5722        let mut get_resp = self.get_object(get_req).await?;
5723        let blob = get_resp.output.body.take().ok_or_else(|| {
5724            S3Error::with_message(
5725                S3ErrorCode::InternalError,
5726                "Select: object body was empty after GET",
5727            )
5728        })?;
5729        let body_bytes = crate::blob::collect_blob(blob, self.max_body_bytes)
5730            .await
5731            .map_err(internal("collect Select body"))?;
5732        let scanned = body_bytes.len() as u64;
5733
5734        let matched_payload = match input_format {
5735            SelectInputFormat::JsonLines => run_select_jsonlines(&sql, &body_bytes, output_format)
5736                .map_err(|e| select_error_to_s3(e, "JSON Lines"))?,
5737            SelectInputFormat::Csv { .. } => {
5738                run_select_csv(&sql, &body_bytes, input_format, output_format)
5739                    .map_err(|e| select_error_to_s3(e, "CSV"))?
5740            }
5741        };
5742
5743        let returned = matched_payload.len() as u64;
5744        let processed = scanned;
5745        let mut events: Vec<S3Result<SelectObjectContentEvent>> = Vec::with_capacity(3);
5746        if !matched_payload.is_empty() {
5747            events.push(Ok(SelectObjectContentEvent::Records(RecordsEvent {
5748                payload: Some(bytes::Bytes::from(matched_payload)),
5749            })));
5750        }
5751        events.push(Ok(SelectObjectContentEvent::Stats(StatsEvent {
5752            details: Some(Stats {
5753                bytes_scanned: Some(scanned as i64),
5754                bytes_processed: Some(processed as i64),
5755                bytes_returned: Some(returned as i64),
5756            }),
5757        })));
5758        events.push(Ok(SelectObjectContentEvent::End(EndEvent {})));
5759        // Touch EventStreamWriter so the public API stays linked into the
5760        // build (the actual wire framing is delegated to s3s).
5761        let _writer = EventStreamWriter::new();
5762
5763        let stream = SelectObjectContentEventStream::new(futures::stream::iter(events));
5764        let output = SelectObjectContentOutput {
5765            payload: Some(stream),
5766        };
5767        Ok(S3Response::new(output))
5768    }
5769
5770    // ---- Bucket Inventory configuration (v0.6 #36) ----
5771    //
5772    // When an `InventoryManager` is attached, S4-server owns the
5773    // configuration store and these handlers no longer pass through to
5774    // the backend. The mapping between the s3s-typed
5775    // `InventoryConfiguration` and the inventory module's internal
5776    // `InventoryConfig` is intentionally lossy: only the fields S4
5777    // actually uses for periodic CSV emission survive the round trip
5778    // (id, source bucket, destination bucket / prefix, format, included
5779    // versions, schedule frequency). Optional fields, encryption, and
5780    // filter prefixes are accepted on PUT and re-surfaced on GET via
5781    // a best-effort default-shape `InventoryConfiguration` so the
5782    // client sees a roundtrip-clean response.
5783    async fn put_bucket_inventory_configuration(
5784        &self,
5785        req: S3Request<PutBucketInventoryConfigurationInput>,
5786    ) -> S3Result<S3Response<PutBucketInventoryConfigurationOutput>> {
5787        if let Some(mgr) = self.inventory.as_ref() {
5788            let cfg = inv_from_dto(
5789                &req.input.bucket,
5790                &req.input.id,
5791                &req.input.inventory_configuration,
5792            );
5793            mgr.put(cfg);
5794            return Ok(S3Response::new(
5795                PutBucketInventoryConfigurationOutput::default(),
5796            ));
5797        }
5798        self.backend.put_bucket_inventory_configuration(req).await
5799    }
5800
5801    async fn get_bucket_inventory_configuration(
5802        &self,
5803        req: S3Request<GetBucketInventoryConfigurationInput>,
5804    ) -> S3Result<S3Response<GetBucketInventoryConfigurationOutput>> {
5805        if let Some(mgr) = self.inventory.as_ref() {
5806            let cfg = mgr.get(&req.input.bucket, &req.input.id);
5807            if let Some(cfg) = cfg {
5808                let out = GetBucketInventoryConfigurationOutput {
5809                    inventory_configuration: Some(inv_to_dto(&cfg)),
5810                };
5811                return Ok(S3Response::new(out));
5812            }
5813            // AWS returns `NoSuchConfiguration` (404) when the id has no
5814            // matching inventory configuration on the bucket. The
5815            // generated `S3ErrorCode` enum doesn't expose a typed variant
5816            // for this code, so we round-trip through `from_bytes` which
5817            // wraps unknown codes as `Custom(...)` (= the AWS-canonical
5818            // error-code string survives into the XML response envelope).
5819            let code =
5820                S3ErrorCode::from_bytes(b"NoSuchConfiguration").unwrap_or(S3ErrorCode::NoSuchKey);
5821            return Err(S3Error::with_message(
5822                code,
5823                format!(
5824                    "no inventory configuration with id={} on bucket={}",
5825                    req.input.id, req.input.bucket
5826                ),
5827            ));
5828        }
5829        self.backend.get_bucket_inventory_configuration(req).await
5830    }
5831
5832    async fn list_bucket_inventory_configurations(
5833        &self,
5834        req: S3Request<ListBucketInventoryConfigurationsInput>,
5835    ) -> S3Result<S3Response<ListBucketInventoryConfigurationsOutput>> {
5836        if let Some(mgr) = self.inventory.as_ref() {
5837            let list = mgr.list_for_bucket(&req.input.bucket);
5838            let dto_list: Vec<InventoryConfiguration> = list.iter().map(inv_to_dto).collect();
5839            let out = ListBucketInventoryConfigurationsOutput {
5840                continuation_token: req.input.continuation_token.clone(),
5841                inventory_configuration_list: if dto_list.is_empty() {
5842                    None
5843                } else {
5844                    Some(dto_list)
5845                },
5846                is_truncated: Some(false),
5847                next_continuation_token: None,
5848            };
5849            return Ok(S3Response::new(out));
5850        }
5851        self.backend.list_bucket_inventory_configurations(req).await
5852    }
5853
5854    async fn delete_bucket_inventory_configuration(
5855        &self,
5856        req: S3Request<DeleteBucketInventoryConfigurationInput>,
5857    ) -> S3Result<S3Response<DeleteBucketInventoryConfigurationOutput>> {
5858        if let Some(mgr) = self.inventory.as_ref() {
5859            mgr.delete(&req.input.bucket, &req.input.id);
5860            return Ok(S3Response::new(
5861                DeleteBucketInventoryConfigurationOutput::default(),
5862            ));
5863        }
5864        self.backend
5865            .delete_bucket_inventory_configuration(req)
5866            .await
5867    }
5868}
5869
5870// ---------------------------------------------------------------------------
5871// v0.6 #36: Convert between the s3s-typed `InventoryConfiguration` (the wire
5872// surface) and our internal `crate::inventory::InventoryConfig`. Only the
5873// fields S4 actually uses for CSV emission survive the round trip; the
5874// missing fields (filter prefix, optional fields, encryption) are dropped on
5875// PUT and re-rendered as the AWS-default shape on GET so the client sees a
5876// well-formed `InventoryConfiguration`.
5877// ---------------------------------------------------------------------------
5878
5879fn inv_from_dto(
5880    bucket: &str,
5881    id: &str,
5882    dto: &InventoryConfiguration,
5883) -> crate::inventory::InventoryConfig {
5884    let frequency_hours = match dto.schedule.frequency.as_str() {
5885        "Weekly" => 24 * 7,
5886        // Daily is the default; anything S4 doesn't recognise (incl.
5887        // empty, which is the s3s-default) maps to Daily so the
5888        // operator's PUT doesn't silently turn into a no-op cadence.
5889        _ => 24,
5890    };
5891    // Parquet/ORC are not supported (issue #36 scope); we still accept
5892    // the PUT so callers don't fail-loud, but we record CSV and rely on
5893    // the operator catching the discrepancy on GET.
5894    let format = crate::inventory::InventoryFormat::Csv;
5895    crate::inventory::InventoryConfig {
5896        id: id.to_owned(),
5897        bucket: bucket.to_owned(),
5898        destination_bucket: dto.destination.s3_bucket_destination.bucket.clone(),
5899        destination_prefix: dto
5900            .destination
5901            .s3_bucket_destination
5902            .prefix
5903            .clone()
5904            .unwrap_or_default(),
5905        frequency_hours,
5906        format,
5907        included_object_versions: crate::inventory::IncludedVersions::from_aws_str(
5908            dto.included_object_versions.as_str(),
5909        ),
5910    }
5911}
5912
5913fn inv_to_dto(cfg: &crate::inventory::InventoryConfig) -> InventoryConfiguration {
5914    InventoryConfiguration {
5915        id: cfg.id.clone(),
5916        is_enabled: true,
5917        included_object_versions: InventoryIncludedObjectVersions::from(
5918            cfg.included_object_versions.as_aws_str().to_owned(),
5919        ),
5920        destination: InventoryDestination {
5921            s3_bucket_destination: InventoryS3BucketDestination {
5922                account_id: None,
5923                bucket: cfg.destination_bucket.clone(),
5924                encryption: None,
5925                format: InventoryFormat::from(cfg.format.as_aws_str().to_owned()),
5926                prefix: if cfg.destination_prefix.is_empty() {
5927                    None
5928                } else {
5929                    Some(cfg.destination_prefix.clone())
5930                },
5931            },
5932        },
5933        schedule: InventorySchedule {
5934            // `frequency_hours == 168` -> Weekly; everything else maps to
5935            // Daily for the wire response (the manager keeps the precise
5936            // hour count internally for due-checking).
5937            frequency: InventoryFrequency::from(
5938                if cfg.frequency_hours == 24 * 7 {
5939                    "Weekly"
5940                } else {
5941                    "Daily"
5942                }
5943                .to_owned(),
5944            ),
5945        },
5946        filter: None,
5947        optional_fields: None,
5948    }
5949}
5950
5951// ---------------------------------------------------------------------------
5952// v0.6 #35: Convert between the s3s-typed `NotificationConfiguration` (the
5953// wire surface) and our internal `crate::notifications::NotificationConfig`.
5954//
5955// We support TopicConfiguration (-> Destination::Sns) and QueueConfiguration
5956// (-> Destination::Sqs). LambdaFunction and EventBridge configurations are
5957// silently dropped on PUT (out of scope for v0.6 #35); the GET response only
5958// surfaces topic / queue rules.
5959//
5960// The webhook destination has no AWS-native wire form: operators configure
5961// webhooks via the JSON snapshot file (`--notifications-state-file`) or by
5962// poking `NotificationManager::put` directly from a custom binary. This
5963// keeps the wire surface AWS-compatible while still letting the always-
5964// available `Webhook` destination be reachable.
5965// ---------------------------------------------------------------------------
5966
5967fn notif_from_dto(dto: &NotificationConfiguration) -> crate::notifications::NotificationConfig {
5968    let mut rules: Vec<crate::notifications::NotificationRule> = Vec::new();
5969    if let Some(topics) = dto.topic_configurations.as_ref() {
5970        for (idx, t) in topics.iter().enumerate() {
5971            let events = events_from_dto(&t.events);
5972            let (prefix, suffix) = filter_from_dto(t.filter.as_ref());
5973            rules.push(crate::notifications::NotificationRule {
5974                id: t.id.clone().unwrap_or_else(|| format!("topic-{idx}")),
5975                events,
5976                destination: crate::notifications::Destination::Sns {
5977                    topic_arn: t.topic_arn.clone(),
5978                },
5979                filter_prefix: prefix,
5980                filter_suffix: suffix,
5981            });
5982        }
5983    }
5984    if let Some(queues) = dto.queue_configurations.as_ref() {
5985        for (idx, q) in queues.iter().enumerate() {
5986            let events = events_from_dto(&q.events);
5987            let (prefix, suffix) = filter_from_dto(q.filter.as_ref());
5988            rules.push(crate::notifications::NotificationRule {
5989                id: q.id.clone().unwrap_or_else(|| format!("queue-{idx}")),
5990                events,
5991                destination: crate::notifications::Destination::Sqs {
5992                    queue_arn: q.queue_arn.clone(),
5993                },
5994                filter_prefix: prefix,
5995                filter_suffix: suffix,
5996            });
5997        }
5998    }
5999    crate::notifications::NotificationConfig { rules }
6000}
6001
6002fn notif_to_dto(cfg: &crate::notifications::NotificationConfig) -> NotificationConfiguration {
6003    let mut topics: Vec<TopicConfiguration> = Vec::new();
6004    let mut queues: Vec<QueueConfiguration> = Vec::new();
6005    for rule in &cfg.rules {
6006        let events: Vec<Event> = rule
6007            .events
6008            .iter()
6009            .map(|e| Event::from(e.as_aws_str().to_owned()))
6010            .collect();
6011        let filter = filter_to_dto(rule.filter_prefix.as_deref(), rule.filter_suffix.as_deref());
6012        match &rule.destination {
6013            crate::notifications::Destination::Sns { topic_arn } => {
6014                topics.push(TopicConfiguration {
6015                    events,
6016                    filter,
6017                    id: Some(rule.id.clone()),
6018                    topic_arn: topic_arn.clone(),
6019                });
6020            }
6021            crate::notifications::Destination::Sqs { queue_arn } => {
6022                queues.push(QueueConfiguration {
6023                    events,
6024                    filter,
6025                    id: Some(rule.id.clone()),
6026                    queue_arn: queue_arn.clone(),
6027                });
6028            }
6029            // Webhook destinations have no AWS wire equivalent — they
6030            // round-trip through the JSON snapshot only. Skip them on the
6031            // GET surface (an SDK consumer wouldn't know what to do with
6032            // them anyway).
6033            crate::notifications::Destination::Webhook { .. } => {}
6034        }
6035    }
6036    NotificationConfiguration {
6037        event_bridge_configuration: None,
6038        lambda_function_configurations: None,
6039        queue_configurations: if queues.is_empty() {
6040            None
6041        } else {
6042            Some(queues)
6043        },
6044        topic_configurations: if topics.is_empty() {
6045            None
6046        } else {
6047            Some(topics)
6048        },
6049    }
6050}
6051
6052fn events_from_dto(events: &[Event]) -> Vec<crate::notifications::EventType> {
6053    events
6054        .iter()
6055        .filter_map(|e| crate::notifications::EventType::from_aws_str(e.as_ref()))
6056        .collect()
6057}
6058
6059fn filter_from_dto(
6060    f: Option<&NotificationConfigurationFilter>,
6061) -> (Option<String>, Option<String>) {
6062    let Some(f) = f else {
6063        return (None, None);
6064    };
6065    let Some(key) = f.key.as_ref() else {
6066        return (None, None);
6067    };
6068    let Some(rules) = key.filter_rules.as_ref() else {
6069        return (None, None);
6070    };
6071    let mut prefix = None;
6072    let mut suffix = None;
6073    for r in rules {
6074        let name = r.name.as_ref().map(|n| n.as_str().to_ascii_lowercase());
6075        let value = r.value.clone();
6076        match name.as_deref() {
6077            Some("prefix") => prefix = value,
6078            Some("suffix") => suffix = value,
6079            _ => {}
6080        }
6081    }
6082    (prefix, suffix)
6083}
6084
6085fn filter_to_dto(
6086    prefix: Option<&str>,
6087    suffix: Option<&str>,
6088) -> Option<NotificationConfigurationFilter> {
6089    if prefix.is_none() && suffix.is_none() {
6090        return None;
6091    }
6092    let mut rules: Vec<FilterRule> = Vec::new();
6093    if let Some(p) = prefix {
6094        rules.push(FilterRule {
6095            name: Some(FilterRuleName::from("prefix".to_owned())),
6096            value: Some(p.to_owned()),
6097        });
6098    }
6099    if let Some(s) = suffix {
6100        rules.push(FilterRule {
6101            name: Some(FilterRuleName::from("suffix".to_owned())),
6102            value: Some(s.to_owned()),
6103        });
6104    }
6105    Some(NotificationConfigurationFilter {
6106        key: Some(S3KeyFilter {
6107            filter_rules: Some(rules),
6108        }),
6109    })
6110}
6111
6112// ---------------------------------------------------------------------------
6113// v0.6 #40: Convert between the s3s-typed `ReplicationConfiguration` (the
6114// wire surface) and our internal `crate::replication::ReplicationConfig`.
6115// AWS's `ReplicationRuleFilter` is a sum type — `Prefix | Tag | And { Prefix,
6116// Tags }`; we flatten it into the single `(prefix, tag-vec)` representation
6117// the matcher needs. Sub-blocks v0.6 #40 does not implement
6118// (DeleteMarkerReplication / SourceSelectionCriteria / ReplicationTime /
6119// Metrics / EncryptionConfiguration) round-trip as `None` on GET — operators
6120// who set them on PUT see them silently dropped, mirroring "feature not
6121// supported in this release" semantics.
6122// ---------------------------------------------------------------------------
6123
6124fn replication_from_dto(dto: &ReplicationConfiguration) -> crate::replication::ReplicationConfig {
6125    let rules = dto
6126        .rules
6127        .iter()
6128        .enumerate()
6129        .map(|(idx, r)| {
6130            let id =
6131                r.id.as_ref()
6132                    .map(|s| s.as_str().to_owned())
6133                    .unwrap_or_else(|| format!("rule-{idx}"));
6134            let priority = r.priority.unwrap_or(0).max(0) as u32;
6135            let status_enabled = r.status.as_str() == ReplicationRuleStatus::ENABLED;
6136            let filter = replication_filter_from_dto(r.filter.as_ref(), r.prefix.as_deref());
6137            let destination_bucket = r.destination.bucket.clone();
6138            let destination_storage_class = r
6139                .destination
6140                .storage_class
6141                .as_ref()
6142                .map(|s| s.as_str().to_owned());
6143            crate::replication::ReplicationRule {
6144                id,
6145                priority,
6146                status_enabled,
6147                filter,
6148                destination_bucket,
6149                destination_storage_class,
6150            }
6151        })
6152        .collect();
6153    crate::replication::ReplicationConfig {
6154        role: dto.role.clone(),
6155        rules,
6156    }
6157}
6158
6159fn replication_to_dto(cfg: &crate::replication::ReplicationConfig) -> ReplicationConfiguration {
6160    let rules = cfg
6161        .rules
6162        .iter()
6163        .map(|r| {
6164            let status = if r.status_enabled {
6165                ReplicationRuleStatus::from_static(ReplicationRuleStatus::ENABLED)
6166            } else {
6167                ReplicationRuleStatus::from_static(ReplicationRuleStatus::DISABLED)
6168            };
6169            let destination = Destination {
6170                access_control_translation: None,
6171                account: None,
6172                bucket: r.destination_bucket.clone(),
6173                encryption_configuration: None,
6174                metrics: None,
6175                replication_time: None,
6176                storage_class: r
6177                    .destination_storage_class
6178                    .as_ref()
6179                    .map(|s| StorageClass::from(s.clone())),
6180            };
6181            let filter = Some(replication_filter_to_dto(&r.filter));
6182            ReplicationRule {
6183                delete_marker_replication: None,
6184                destination,
6185                existing_object_replication: None,
6186                filter,
6187                id: Some(r.id.clone()),
6188                prefix: None,
6189                priority: Some(r.priority as i32),
6190                source_selection_criteria: None,
6191                status,
6192            }
6193        })
6194        .collect();
6195    ReplicationConfiguration {
6196        role: cfg.role.clone(),
6197        rules,
6198    }
6199}
6200
6201fn replication_filter_from_dto(
6202    f: Option<&ReplicationRuleFilter>,
6203    rule_level_prefix: Option<&str>,
6204) -> crate::replication::ReplicationFilter {
6205    let mut prefix: Option<String> = rule_level_prefix.map(str::to_owned);
6206    let mut tags: Vec<(String, String)> = Vec::new();
6207    if let Some(f) = f {
6208        if let Some(p) = f.prefix.as_ref()
6209            && prefix.is_none()
6210        {
6211            prefix = Some(p.clone());
6212        }
6213        if let Some(t) = f.tag.as_ref()
6214            && let (Some(k), Some(v)) = (t.key.as_ref(), t.value.as_ref())
6215        {
6216            tags.push((k.clone(), v.clone()));
6217        }
6218        if let Some(and) = f.and.as_ref() {
6219            if let Some(p) = and.prefix.as_ref()
6220                && prefix.is_none()
6221            {
6222                prefix = Some(p.clone());
6223            }
6224            if let Some(ts) = and.tags.as_ref() {
6225                for t in ts {
6226                    if let (Some(k), Some(v)) = (t.key.as_ref(), t.value.as_ref()) {
6227                        tags.push((k.clone(), v.clone()));
6228                    }
6229                }
6230            }
6231        }
6232    }
6233    crate::replication::ReplicationFilter { prefix, tags }
6234}
6235
6236fn replication_filter_to_dto(f: &crate::replication::ReplicationFilter) -> ReplicationRuleFilter {
6237    if f.tags.is_empty() {
6238        ReplicationRuleFilter {
6239            and: None,
6240            prefix: f.prefix.clone(),
6241            tag: None,
6242        }
6243    } else if f.tags.len() == 1 && f.prefix.is_none() {
6244        let (k, v) = &f.tags[0];
6245        ReplicationRuleFilter {
6246            and: None,
6247            prefix: None,
6248            tag: Some(Tag {
6249                key: Some(k.clone()),
6250                value: Some(v.clone()),
6251            }),
6252        }
6253    } else {
6254        let tags: Vec<Tag> = f
6255            .tags
6256            .iter()
6257            .map(|(k, v)| Tag {
6258                key: Some(k.clone()),
6259                value: Some(v.clone()),
6260            })
6261            .collect();
6262        ReplicationRuleFilter {
6263            and: Some(ReplicationRuleAndOperator {
6264                prefix: f.prefix.clone(),
6265                tags: Some(tags),
6266            }),
6267            prefix: None,
6268            tag: None,
6269        }
6270    }
6271}
6272
6273// ---------------------------------------------------------------------------
6274// v0.6 #37: Convert between the s3s-typed `BucketLifecycleConfiguration`
6275// (the wire surface) and our internal `crate::lifecycle::LifecycleConfig`.
6276// The internal representation flattens AWS's "Filter | And" disjunction
6277// into a single `LifecycleFilter` struct of optional fields plus a tag
6278// vector. Fields S4's evaluator does not consume
6279// (`expired_object_delete_marker`, `noncurrent_version_transitions`,
6280// `transition_default_minimum_object_size`, the storage class on the
6281// noncurrent expiration) are dropped on PUT and re-rendered as their
6282// AWS-default shape on GET so the client always sees a well-formed
6283// configuration.
6284// ---------------------------------------------------------------------------
6285
6286fn dto_lifecycle_to_internal(
6287    dto: &BucketLifecycleConfiguration,
6288) -> crate::lifecycle::LifecycleConfig {
6289    crate::lifecycle::LifecycleConfig {
6290        rules: dto.rules.iter().map(dto_rule_to_internal).collect(),
6291    }
6292}
6293
6294fn dto_rule_to_internal(rule: &LifecycleRule) -> crate::lifecycle::LifecycleRule {
6295    let status = crate::lifecycle::LifecycleStatus::from_aws_str(rule.status.as_str());
6296    let filter = rule
6297        .filter
6298        .as_ref()
6299        .map(dto_filter_to_internal)
6300        .unwrap_or_default();
6301    let expiration_days = rule
6302        .expiration
6303        .as_ref()
6304        .and_then(|e| e.days)
6305        .and_then(|d| u32::try_from(d).ok());
6306    let expiration_date = rule
6307        .expiration
6308        .as_ref()
6309        .and_then(|e| e.date.as_ref())
6310        .and_then(timestamp_to_chrono_utc);
6311    let transitions: Vec<crate::lifecycle::TransitionRule> = rule
6312        .transitions
6313        .as_ref()
6314        .map(|ts| {
6315            ts.iter()
6316                .filter_map(|t| {
6317                    let days = u32::try_from(t.days?).ok()?;
6318                    let storage_class = t.storage_class.as_ref()?.as_str().to_owned();
6319                    Some(crate::lifecycle::TransitionRule {
6320                        days,
6321                        storage_class,
6322                    })
6323                })
6324                .collect()
6325        })
6326        .unwrap_or_default();
6327    let noncurrent_version_expiration_days = rule
6328        .noncurrent_version_expiration
6329        .as_ref()
6330        .and_then(|n| n.noncurrent_days)
6331        .and_then(|d| u32::try_from(d).ok());
6332    let abort_incomplete_multipart_upload_days = rule
6333        .abort_incomplete_multipart_upload
6334        .as_ref()
6335        .and_then(|a| a.days_after_initiation)
6336        .and_then(|d| u32::try_from(d).ok());
6337    crate::lifecycle::LifecycleRule {
6338        id: rule.id.clone().unwrap_or_default(),
6339        status,
6340        filter,
6341        expiration_days,
6342        expiration_date,
6343        transitions,
6344        noncurrent_version_expiration_days,
6345        abort_incomplete_multipart_upload_days,
6346    }
6347}
6348
6349fn dto_filter_to_internal(filter: &LifecycleRuleFilter) -> crate::lifecycle::LifecycleFilter {
6350    let mut prefix = filter.prefix.clone();
6351    let mut tags: Vec<(String, String)> = Vec::new();
6352    let mut size_gt: Option<u64> = filter
6353        .object_size_greater_than
6354        .and_then(|n| u64::try_from(n).ok());
6355    let mut size_lt: Option<u64> = filter
6356        .object_size_less_than
6357        .and_then(|n| u64::try_from(n).ok());
6358    if let Some(t) = &filter.tag
6359        && let (Some(k), Some(v)) = (t.key.as_ref(), t.value.as_ref())
6360    {
6361        tags.push((k.clone(), v.clone()));
6362    }
6363    if let Some(and) = &filter.and {
6364        if prefix.is_none() {
6365            prefix = and.prefix.clone();
6366        }
6367        if size_gt.is_none() {
6368            size_gt = and
6369                .object_size_greater_than
6370                .and_then(|n| u64::try_from(n).ok());
6371        }
6372        if size_lt.is_none() {
6373            size_lt = and
6374                .object_size_less_than
6375                .and_then(|n| u64::try_from(n).ok());
6376        }
6377        if let Some(ts) = &and.tags {
6378            for t in ts {
6379                if let (Some(k), Some(v)) = (t.key.as_ref(), t.value.as_ref()) {
6380                    tags.push((k.clone(), v.clone()));
6381                }
6382            }
6383        }
6384    }
6385    crate::lifecycle::LifecycleFilter {
6386        prefix,
6387        tags,
6388        object_size_greater_than: size_gt,
6389        object_size_less_than: size_lt,
6390    }
6391}
6392
6393fn internal_rule_to_dto(rule: &crate::lifecycle::LifecycleRule) -> LifecycleRule {
6394    let expiration = if rule.expiration_days.is_some() || rule.expiration_date.is_some() {
6395        Some(LifecycleExpiration {
6396            date: rule.expiration_date.map(chrono_utc_to_timestamp),
6397            days: rule.expiration_days.map(|d| d as i32),
6398            expired_object_delete_marker: None,
6399        })
6400    } else {
6401        None
6402    };
6403    let transitions: Option<TransitionList> = if rule.transitions.is_empty() {
6404        None
6405    } else {
6406        Some(
6407            rule.transitions
6408                .iter()
6409                .map(|t| Transition {
6410                    date: None,
6411                    days: Some(t.days as i32),
6412                    storage_class: Some(TransitionStorageClass::from(t.storage_class.clone())),
6413                })
6414                .collect(),
6415        )
6416    };
6417    let noncurrent_version_expiration =
6418        rule.noncurrent_version_expiration_days
6419            .map(|d| NoncurrentVersionExpiration {
6420                newer_noncurrent_versions: None,
6421                noncurrent_days: Some(d as i32),
6422            });
6423    let abort_incomplete_multipart_upload =
6424        rule.abort_incomplete_multipart_upload_days
6425            .map(|d| AbortIncompleteMultipartUpload {
6426                days_after_initiation: Some(d as i32),
6427            });
6428    let filter = if rule.filter.tags.is_empty()
6429        && rule.filter.object_size_greater_than.is_none()
6430        && rule.filter.object_size_less_than.is_none()
6431    {
6432        rule.filter.prefix.as_ref().map(|p| LifecycleRuleFilter {
6433            and: None,
6434            object_size_greater_than: None,
6435            object_size_less_than: None,
6436            prefix: Some(p.clone()),
6437            tag: None,
6438        })
6439    } else if rule.filter.tags.len() == 1
6440        && rule.filter.prefix.is_none()
6441        && rule.filter.object_size_greater_than.is_none()
6442        && rule.filter.object_size_less_than.is_none()
6443    {
6444        let (k, v) = rule.filter.tags[0].clone();
6445        Some(LifecycleRuleFilter {
6446            and: None,
6447            object_size_greater_than: None,
6448            object_size_less_than: None,
6449            prefix: None,
6450            tag: Some(Tag {
6451                key: Some(k),
6452                value: Some(v),
6453            }),
6454        })
6455    } else {
6456        let tags = if rule.filter.tags.is_empty() {
6457            None
6458        } else {
6459            Some(
6460                rule.filter
6461                    .tags
6462                    .iter()
6463                    .map(|(k, v)| Tag {
6464                        key: Some(k.clone()),
6465                        value: Some(v.clone()),
6466                    })
6467                    .collect(),
6468            )
6469        };
6470        Some(LifecycleRuleFilter {
6471            and: Some(LifecycleRuleAndOperator {
6472                object_size_greater_than: rule
6473                    .filter
6474                    .object_size_greater_than
6475                    .and_then(|n| i64::try_from(n).ok()),
6476                object_size_less_than: rule
6477                    .filter
6478                    .object_size_less_than
6479                    .and_then(|n| i64::try_from(n).ok()),
6480                prefix: rule.filter.prefix.clone(),
6481                tags,
6482            }),
6483            object_size_greater_than: None,
6484            object_size_less_than: None,
6485            prefix: None,
6486            tag: None,
6487        })
6488    };
6489    LifecycleRule {
6490        abort_incomplete_multipart_upload,
6491        expiration,
6492        filter,
6493        id: if rule.id.is_empty() {
6494            None
6495        } else {
6496            Some(rule.id.clone())
6497        },
6498        noncurrent_version_expiration,
6499        noncurrent_version_transitions: None,
6500        prefix: None,
6501        status: ExpirationStatus::from(rule.status.as_aws_str().to_owned()),
6502        transitions,
6503    }
6504}
6505
6506// (timestamp <-> chrono helpers `timestamp_to_chrono_utc` /
6507// `chrono_utc_to_timestamp` are defined earlier in this file for the
6508// tagging/notifications work; the lifecycle DTO converters reuse them.)
6509
6510// ---------------------------------------------------------------------------
6511// v0.5 #33: SigV4a (asymmetric ECDSA-P256) integration hook.
6512//
6513// Kept as a self-contained block at the bottom of the file so it doesn't
6514// touch the existing `S4Service` struct, `new()`, or any of the per-op
6515// handlers above. The hook is wired in by the binary at server-build time
6516// as a hyper middleware layer (see `main.rs`), NOT inside `S4Service`.
6517//
6518// Lifecycle:
6519//   1. `SigV4aGate::new(store)` is constructed once at boot from the
6520//      operator-supplied credential directory.
6521//   2. For each incoming request, `SigV4aGate::pre_route(&req,
6522//      &requested_region, &canonical_request_bytes)` is invoked BEFORE
6523//      the request hits the S3 framework. If the request claims SigV4a
6524//      and verifies, control returns to the framework. Otherwise a 403
6525//      `SignatureDoesNotMatch` is produced.
6526//   3. Plain SigV4 (HMAC-SHA256) requests pass through untouched.
6527// ---------------------------------------------------------------------------
6528
6529/// Gate that fronts the S3 service path with SigV4a verification (v0.5 #33).
6530///
6531/// Wraps a [`crate::sigv4a::SigV4aCredentialStore`] and exposes a single
6532/// `pre_route` entry point that returns `Ok(())` for both
6533/// "request is plain SigV4 — pass through" and "request is SigV4a and
6534/// verified", and an `Err(...)` containing a 403-equivalent diagnostic
6535/// otherwise. Cheap to clone (the inner store is `Arc`-backed).
6536///
6537/// v0.8.4 #76 (audit H-6): the gate now enforces an `x-amz-date`
6538/// freshness window (default 15 min, AWS-spec) and a strict credential
6539/// scope shape (`<key>/<YYYYMMDD>/s3/aws4_request`), shutting the
6540/// captured-request replay vector — previously a stolen valid SigV4a
6541/// signature could be replayed indefinitely (including DELETE).
6542#[derive(Debug, Clone)]
6543pub struct SigV4aGate {
6544    store: crate::sigv4a::SharedSigV4aCredentialStore,
6545    /// v0.8.4 #76: how far the request's `x-amz-date` may drift from
6546    /// the server's clock before being rejected with 403
6547    /// `RequestTimeTooSkewed`. Matches the AWS S3 spec default of
6548    /// 15 min when constructed via [`SigV4aGate::new`]; the operator
6549    /// can override via [`SigV4aGate::with_skew_tolerance`] (CLI flag
6550    /// `--sigv4a-skew-tolerance-seconds`).
6551    skew_tolerance: chrono::Duration,
6552}
6553
6554impl SigV4aGate {
6555    /// Default `x-amz-date` skew tolerance — 15 min, matching AWS S3.
6556    pub const DEFAULT_SKEW_TOLERANCE_SECS: i64 = 900;
6557
6558    #[must_use]
6559    pub fn new(store: crate::sigv4a::SharedSigV4aCredentialStore) -> Self {
6560        Self {
6561            store,
6562            skew_tolerance: chrono::Duration::seconds(Self::DEFAULT_SKEW_TOLERANCE_SECS),
6563        }
6564    }
6565
6566    /// v0.8.4 #76: override the `x-amz-date` skew tolerance (default
6567    /// 15 min). Operators can widen this for high-clock-drift
6568    /// environments or tighten it for compliance regimes that demand
6569    /// stricter freshness.
6570    #[must_use]
6571    pub fn with_skew_tolerance(mut self, skew: chrono::Duration) -> Self {
6572        self.skew_tolerance = skew;
6573        self
6574    }
6575
6576    /// Read the configured skew tolerance — exposed mostly for test +
6577    /// observability use.
6578    #[must_use]
6579    pub fn skew_tolerance(&self) -> chrono::Duration {
6580        self.skew_tolerance
6581    }
6582
6583    /// Inspect an incoming HTTP request. Behaviour:
6584    ///
6585    /// - Not SigV4a (no `X-Amz-Region-Set` and no SigV4a `Authorization`
6586    ///   prefix) → returns `Ok(())`; the framework's existing SigV4
6587    ///   path handles the request.
6588    /// - SigV4a + valid signature + region match + fresh x-amz-date
6589    ///   → `Ok(())`.
6590    /// - SigV4a + unknown access-key-id → `Err` with `InvalidAccessKeyId`.
6591    /// - SigV4a + bad signature / region mismatch → `Err` with
6592    ///   `SignatureDoesNotMatch`.
6593    /// - SigV4a + missing or skewed `x-amz-date` → `Err` with one of
6594    ///   the v0.8.4 #76 freshness variants (`RequestTimeTooSkewed`
6595    ///   et al.).
6596    ///
6597    /// `canonical_request_bytes` is the SigV4a string-to-sign (or
6598    /// canonical-request bytes; the caller decides) that the framework
6599    /// has already produced for this request. Keeping it as a parameter
6600    /// instead of rebuilding it inside the hook avoids duplicating the
6601    /// canonicalisation logic.
6602    pub fn pre_route<B>(
6603        &self,
6604        req: &http::Request<B>,
6605        requested_region: &str,
6606        canonical_request_bytes: &[u8],
6607    ) -> Result<(), SigV4aGateError> {
6608        self.pre_route_at(
6609            req,
6610            requested_region,
6611            canonical_request_bytes,
6612            chrono::Utc::now(),
6613        )
6614    }
6615
6616    /// Like [`SigV4aGate::pre_route`] but takes an explicit `now` for
6617    /// tests that need to pin the freshness clock. Production callers
6618    /// use `pre_route` (which calls `chrono::Utc::now()`).
6619    pub fn pre_route_at<B>(
6620        &self,
6621        req: &http::Request<B>,
6622        requested_region: &str,
6623        canonical_request_bytes: &[u8],
6624        now: chrono::DateTime<chrono::Utc>,
6625    ) -> Result<(), SigV4aGateError> {
6626        if !crate::sigv4a::detect(req) {
6627            return Ok(());
6628        }
6629        let auth_hdr = req
6630            .headers()
6631            .get(http::header::AUTHORIZATION)
6632            .and_then(|v| v.to_str().ok())
6633            .ok_or(SigV4aGateError::MissingAuthorization)?;
6634        let parsed = crate::sigv4a::parse_authorization_header(auth_hdr)
6635            .map_err(|_| SigV4aGateError::MalformedAuthorization)?;
6636        let region_set = req
6637            .headers()
6638            .get(crate::sigv4a::REGION_SET_HEADER)
6639            .and_then(|v| v.to_str().ok())
6640            .unwrap_or("*");
6641        let key = self
6642            .store
6643            .get(&parsed.access_key_id)
6644            .ok_or_else(|| SigV4aGateError::UnknownAccessKey(parsed.access_key_id.clone()))?;
6645        // v0.8.4 #76: snapshot the request headers into a
6646        // lowercase-keyed flat map so `verify_request` can do the
6647        // x-amz-date freshness checks without taking a generic
6648        // `HeaderMap` dep. Cheap because the headers list is tiny.
6649        //
6650        // v0.8.5 #84 (audit H-4): detect duplicate header names while
6651        // we flatten — `HashMap::insert` would silently overwrite the
6652        // first value with the second, mirroring the auth-confusion
6653        // vector the canonical-request builder also defends against.
6654        // Reject upfront so the rest of the gate (freshness check,
6655        // ECDSA verify) never sees a half-truncated header set. We
6656        // detect by checking `contains_key` *before* insertion rather
6657        // than by counting via `headers().get_all`, because the
6658        // upstream `HeaderMap` iteration yields each duplicate entry
6659        // as its own (name, value) pair — the second-seen entry is
6660        // exactly what `contains_key` traps.
6661        let mut header_map: std::collections::HashMap<String, String> =
6662            std::collections::HashMap::with_capacity(req.headers().len());
6663        for (name, value) in req.headers() {
6664            if let Ok(v) = value.to_str() {
6665                let lower = name.as_str().to_ascii_lowercase();
6666                if header_map.contains_key(&lower) {
6667                    return Err(SigV4aGateError::Verify(
6668                        crate::sigv4a::SigV4aError::DuplicateSignedHeader { header: lower },
6669                    ));
6670                }
6671                header_map.insert(lower, v.to_string());
6672            }
6673        }
6674        crate::sigv4a::verify_request(
6675            &parsed,
6676            &header_map,
6677            canonical_request_bytes,
6678            key,
6679            region_set,
6680            requested_region,
6681            now,
6682            self.skew_tolerance,
6683        )
6684        .map_err(SigV4aGateError::Verify)?;
6685        Ok(())
6686    }
6687}
6688
6689/// Failure modes from [`SigV4aGate::pre_route`]. All variants map to
6690/// HTTP 403 with one of the two AWS-standard error codes
6691/// (`InvalidAccessKeyId` / `SignatureDoesNotMatch` / `RequestTimeTooSkewed`)
6692/// — see [`SigV4aGateError::s3_error_code`].
6693#[derive(Debug, thiserror::Error)]
6694pub enum SigV4aGateError {
6695    #[error("missing Authorization header")]
6696    MissingAuthorization,
6697    #[error("malformed SigV4a Authorization header")]
6698    MalformedAuthorization,
6699    #[error("unknown SigV4a access-key-id: {0}")]
6700    UnknownAccessKey(String),
6701    #[error("SigV4a verification failed: {0}")]
6702    Verify(#[source] crate::sigv4a::SigV4aError),
6703}
6704
6705impl SigV4aGateError {
6706    /// AWS S3 error code that should accompany the response.
6707    ///
6708    /// v0.8.4 #76 (audit H-6): the freshness check surfaces
6709    /// `RequestTimeTooSkewed` (matches AWS spec); date / scope shape
6710    /// failures surface as `InvalidRequest` (400); other failures stay
6711    /// `SignatureDoesNotMatch` / `InvalidAccessKeyId` (403) so the wire
6712    /// surface stays AWS-compatible.
6713    #[must_use]
6714    pub fn s3_error_code(&self) -> &'static str {
6715        match self {
6716            Self::UnknownAccessKey(_) => "InvalidAccessKeyId",
6717            Self::Verify(crate::sigv4a::SigV4aError::RequestTimeTooSkewed { .. }) => {
6718                "RequestTimeTooSkewed"
6719            }
6720            Self::Verify(
6721                crate::sigv4a::SigV4aError::MissingXAmzDate
6722                | crate::sigv4a::SigV4aError::InvalidDateFormat
6723                | crate::sigv4a::SigV4aError::DateScopeMismatch
6724                | crate::sigv4a::SigV4aError::XAmzDateNotSigned
6725                | crate::sigv4a::SigV4aError::InvalidTerminator
6726                | crate::sigv4a::SigV4aError::WrongService { .. }
6727                | crate::sigv4a::SigV4aError::InvalidCredentialScope,
6728            ) => "InvalidRequest",
6729            _ => "SignatureDoesNotMatch",
6730        }
6731    }
6732
6733    /// HTTP status code to accompany the response. v0.8.4 #76: format
6734    /// errors that are clearly client mistakes (missing / malformed
6735    /// `x-amz-date`, malformed credential scope, wrong service) are
6736    /// surfaced as 400 InvalidRequest; the rest stay 403.
6737    #[must_use]
6738    pub fn http_status(&self) -> http::StatusCode {
6739        match self {
6740            Self::Verify(
6741                crate::sigv4a::SigV4aError::MissingXAmzDate
6742                | crate::sigv4a::SigV4aError::InvalidDateFormat
6743                | crate::sigv4a::SigV4aError::DateScopeMismatch
6744                | crate::sigv4a::SigV4aError::XAmzDateNotSigned
6745                | crate::sigv4a::SigV4aError::InvalidTerminator
6746                | crate::sigv4a::SigV4aError::WrongService { .. }
6747                | crate::sigv4a::SigV4aError::InvalidCredentialScope,
6748            ) => http::StatusCode::BAD_REQUEST,
6749            _ => http::StatusCode::FORBIDDEN,
6750        }
6751    }
6752}
6753
6754#[cfg(test)]
6755mod tests {
6756    use super::*;
6757
6758    #[test]
6759    fn manifest_roundtrip_via_metadata() {
6760        let original = ChunkManifest {
6761            codec: CodecKind::CpuZstd,
6762            original_size: 1234,
6763            compressed_size: 567,
6764            crc32c: 0xdead_beef,
6765        };
6766        let mut meta: Option<Metadata> = None;
6767        write_manifest(&mut meta, &original);
6768        let extracted = extract_manifest(&meta).expect("manifest must round-trip");
6769        assert_eq!(extracted.codec, original.codec);
6770        assert_eq!(extracted.original_size, original.original_size);
6771        assert_eq!(extracted.compressed_size, original.compressed_size);
6772        assert_eq!(extracted.crc32c, original.crc32c);
6773    }
6774
6775    #[test]
6776    fn missing_metadata_yields_none() {
6777        let meta: Option<Metadata> = None;
6778        assert!(extract_manifest(&meta).is_none());
6779    }
6780
6781    #[test]
6782    fn partial_metadata_yields_none() {
6783        let mut meta = Metadata::new();
6784        meta.insert(META_CODEC.into(), "cpu-zstd".into());
6785        let opt = Some(meta);
6786        assert!(extract_manifest(&opt).is_none());
6787    }
6788
6789    #[test]
6790    fn parse_copy_source_range_basic() {
6791        let r = parse_copy_source_range("bytes=10-20").unwrap();
6792        match r {
6793            s3s::dto::Range::Int { first, last } => {
6794                assert_eq!(first, 10);
6795                assert_eq!(last, Some(20));
6796            }
6797            _ => panic!("expected Int range"),
6798        }
6799    }
6800
6801    #[test]
6802    fn parse_copy_source_range_rejects_inverted() {
6803        let err = parse_copy_source_range("bytes=20-10").unwrap_err();
6804        assert!(err.contains("last < first"));
6805    }
6806
6807    #[test]
6808    fn parse_copy_source_range_rejects_missing_prefix() {
6809        let err = parse_copy_source_range("10-20").unwrap_err();
6810        assert!(err.contains("must start with 'bytes='"));
6811    }
6812
6813    #[test]
6814    fn parse_copy_source_range_rejects_open_ended() {
6815        // S3 upload_part_copy spec requires N-M (closed); suffix and
6816        // open-ended forms are not allowed for this header.
6817        assert!(parse_copy_source_range("bytes=10-").is_err());
6818        assert!(parse_copy_source_range("bytes=-10").is_err());
6819    }
6820
6821    // v0.7 #49: safe_object_uri must round-trip every legal S3 key
6822    // (which includes spaces, slashes, control chars, raw UTF-8) into
6823    // a parseable `http::Uri` instead of panicking like the previous
6824    // `format!(...).parse().unwrap()` call sites did.
6825
6826    #[test]
6827    fn safe_object_uri_basic_ascii() {
6828        let uri = safe_object_uri("bucket", "key").expect("ascii must be safe");
6829        assert_eq!(uri.path(), "/bucket/key");
6830    }
6831
6832    #[test]
6833    fn safe_object_uri_encodes_spaces() {
6834        let uri = safe_object_uri("bucket", "key with spaces").expect("must encode spaces");
6835        // RFC 3986 path-segment encoding turns ' ' into %20.
6836        assert!(
6837            uri.path().contains("%20"),
6838            "expected percent-encoded space, got {}",
6839            uri.path()
6840        );
6841        assert!(uri.path().starts_with("/bucket/"));
6842    }
6843
6844    #[test]
6845    fn safe_object_uri_preserves_slashes() {
6846        // S3 keys legally contain '/' as a logical path separator —
6847        // the helper must NOT escape it (otherwise the synthetic URI
6848        // changes the perceived hierarchy).
6849        let uri = safe_object_uri("bucket", "key/with/slashes").expect("slashes must round-trip");
6850        assert_eq!(uri.path(), "/bucket/key/with/slashes");
6851    }
6852
6853    #[test]
6854    fn safe_object_uri_handles_newline_without_panic() {
6855        // Newlines are control chars in URIs; whether the result is
6856        // Ok (encoded as %0A) or Err (parse rejects), the helper
6857        // MUST NOT panic. Either outcome is acceptable.
6858        let _ = safe_object_uri("bucket", "key\n");
6859    }
6860
6861    #[test]
6862    fn safe_object_uri_handles_null_byte_without_panic() {
6863        let _ = safe_object_uri("bucket", "key\0bad");
6864    }
6865
6866    #[test]
6867    fn safe_object_uri_handles_unicode_without_panic() {
6868        // RTL override, BOM, plain Japanese — none should panic.
6869        let _ = safe_object_uri("bucket", "rtl\u{202E}override");
6870        let _ = safe_object_uri("bucket", "\u{FEFF}bom-key");
6871        let _ = safe_object_uri("bucket", "日本語キー");
6872    }
6873
6874    #[test]
6875    fn safe_object_uri_no_panic_for_every_byte() {
6876        // Exhaustive byte coverage: 0x00..=0xFF as a 1-byte key.
6877        // None of these may panic. (0x80..=0xFF are not valid UTF-8
6878        // by themselves; we go through `String::from_utf8_lossy` so
6879        // the helper sees a real `&str` regardless of the raw byte.)
6880        for b in 0u8..=255 {
6881            let s = String::from_utf8_lossy(&[b]).into_owned();
6882            let _ = safe_object_uri("bucket", &s);
6883        }
6884    }
6885
6886    /// v0.8.1 #58: smoke test for the DEK-handling shape used by the
6887    /// SSE-KMS branches of `put_object` and `complete_multipart_upload`.
6888    /// Mirrors the call pattern (generate_dek → length check → copy
6889    /// into stack `[u8; 32]` → reborrow as `&[u8; 32]` for `SseSource`)
6890    /// without spinning up a full `S4Service`.
6891    ///
6892    /// The real assertion this guards against is a regression where
6893    /// the `Zeroizing` wrapper is accidentally dropped before the
6894    /// stack copy lands (e.g. someone refactors to use
6895    /// `let dek = kms.generate_dek(...).await?.0; drop(dek); ...`)
6896    /// or where `&**dek` is rewritten in a way that doesn't compile.
6897    #[tokio::test]
6898    async fn kms_dek_lifetime_within_function_scope() {
6899        use crate::kms::{KmsBackend, LocalKms};
6900        use std::collections::HashMap;
6901        use std::path::PathBuf;
6902        use zeroize::Zeroizing;
6903
6904        let mut keks = HashMap::new();
6905        keks.insert("scope".to_string(), [33u8; 32]);
6906        let kms = LocalKms::from_keks(PathBuf::from("/tmp/kms-scope-test"), keks);
6907
6908        // Mirror the put_object KMS branch shape exactly.
6909        let (dek, wrapped) = kms.generate_dek("scope").await.unwrap();
6910        assert_eq!(dek.len(), 32);
6911        let mut dek_arr: Zeroizing<[u8; 32]> = Zeroizing::new([0u8; 32]);
6912        dek_arr.copy_from_slice(&dek);
6913
6914        // The reborrow used at the SseSource construction site —
6915        // mirrors the call-site pattern where `let dek_ref: &[u8; 32]`
6916        // auto-derefs from a `Zeroizing<[u8; 32]>` reference.
6917        let dek_ref: &[u8; 32] = &dek_arr;
6918        // Sanity: the reborrow points at the same bytes.
6919        assert_eq!(dek_ref, &*dek_arr);
6920        // Wrapped key id flows through unchanged.
6921        assert_eq!(wrapped.key_id, "scope");
6922
6923        // At end of scope, both `dek` (Zeroizing<Vec<u8>>) and
6924        // `dek_arr` (Zeroizing<[u8; 32]>) are dropped, wiping the
6925        // backing memory. Cannot directly assert the wipe (would be
6926        // UB to read freed memory), so this test instead enforces
6927        // that the call shape compiles and executes; the wipe itself
6928        // is exercised by the `zeroize` crate's own test suite.
6929    }
6930
6931    /// v0.8.5 #86 (audit M-2): the replication dispatcher must
6932    /// `acquire_owned()` a permit from `replication_semaphore` before
6933    /// kicking off the destination PUT, so a saturated semaphore
6934    /// back-pressures the in-flight queue depth instead of letting it
6935    /// grow without bound. We exercise the field directly (initial
6936    /// permit count, override via `with_replication_max_concurrent`,
6937    /// permit drop on `Drop`) — the full `spawn_replication_if_matched`
6938    /// integration is exercised by the existing replication tests in
6939    /// `tests/feature_e2e.rs` once a `ReplicationManager` is attached.
6940    #[tokio::test]
6941    async fn replication_semaphore_caps_concurrent_dispatchers() {
6942        // Build a minimal `S4Service` directly — no handler path is
6943        // exercised, only the constructor + setter + accessor shape.
6944        let registry = Arc::new(
6945            CodecRegistry::new(CodecKind::Passthrough)
6946                .with(Arc::new(s4_codec::passthrough::Passthrough)),
6947        );
6948        let dispatcher = Arc::new(s4_codec::dispatcher::AlwaysDispatcher(
6949            CodecKind::Passthrough,
6950        ));
6951        let s4 = S4Service::new(NoopBackend, registry, dispatcher);
6952
6953        // Default cap matches the documented constant.
6954        assert_eq!(
6955            s4.replication_semaphore().available_permits(),
6956            S4Service::<NoopBackend>::DEFAULT_REPLICATION_MAX_CONCURRENT,
6957            "fresh S4Service must expose DEFAULT_REPLICATION_MAX_CONCURRENT permits"
6958        );
6959
6960        // Override via the builder — replaces the underlying `Semaphore`.
6961        let s4 = s4.with_replication_max_concurrent(2);
6962        assert_eq!(
6963            s4.replication_semaphore().available_permits(),
6964            2,
6965            "with_replication_max_concurrent(2) must expose exactly 2 permits"
6966        );
6967
6968        // Acquiring permits must reduce `available_permits()` and
6969        // dropping them must restore the count — this is the contract
6970        // `spawn_replication_if_matched` relies on for back-pressure.
6971        let sem = Arc::clone(s4.replication_semaphore());
6972        let p1 = sem.clone().acquire_owned().await.expect("permit 1");
6973        let p2 = sem.clone().acquire_owned().await.expect("permit 2");
6974        assert_eq!(
6975            sem.available_permits(),
6976            0,
6977            "two acquired permits must zero `available_permits()`"
6978        );
6979        // A third `try_acquire_owned` must fail — the cap is enforced
6980        // synchronously, no extra spawn slips through.
6981        assert!(
6982            sem.clone().try_acquire_owned().is_err(),
6983            "third acquire must back-pressure: cap was 2"
6984        );
6985        drop(p1);
6986        drop(p2);
6987        assert_eq!(
6988            sem.available_permits(),
6989            2,
6990            "dropping permits must restore cap"
6991        );
6992
6993        // Lower-bound clamp: a 0 cap would deadlock all dispatchers,
6994        // so the setter clamps it to 1 instead of accepting it
6995        // (callers are warned in the CLI doc).
6996        let s4 = s4.with_replication_max_concurrent(0);
6997        assert_eq!(
6998            s4.replication_semaphore().available_permits(),
6999            1,
7000            "cap=0 must be clamped to 1 to avoid total deadlock"
7001        );
7002    }
7003
7004    /// v0.8.5 #86 (audit M-1): the access-log flusher must return a
7005    /// `JoinHandle<()>` that the caller can `abort()` on shutdown
7006    /// without leaving a dangling task. The pre-#86 call site dropped
7007    /// the handle at end-of-block (silently detaching it); the fix is
7008    /// hoisting it into a process-lived `Vec` so the graceful-shutdown
7009    /// branch in `main.rs` can wait for clean exit. This test exercises
7010    /// the `JoinHandle.abort()` shape directly so a future refactor that
7011    /// stops returning the handle (or returns a non-abortable wrapper)
7012    /// trips this regression guard.
7013    #[tokio::test]
7014    async fn flusher_handle_can_be_aborted_cleanly() {
7015        // Stand up a minimal `AccessLog` pointing at a tmp dir so the
7016        // flusher's `create_dir_all` succeeds. The dir is cleaned up
7017        // by the OS / test harness; we don't assert on the contents.
7018        let tmp = std::env::temp_dir().join(format!(
7019            "s4-86-flusher-{}-{}",
7020            std::process::id(),
7021            std::time::SystemTime::now()
7022                .duration_since(std::time::UNIX_EPOCH)
7023                .map(|d| d.as_nanos())
7024                .unwrap_or(0)
7025        ));
7026        let dest = crate::access_log::AccessLogDest { dir: tmp.clone() };
7027        let log = crate::access_log::AccessLog::new(dest);
7028        let handle = log.spawn_flusher(None);
7029        assert!(
7030            !handle.is_finished(),
7031            "freshly-spawned flusher must not yet be finished"
7032        );
7033        handle.abort();
7034        // `await`-ing an aborted handle returns `Err(JoinError)` whose
7035        // `is_cancelled()` is true.
7036        let join_result = handle.await;
7037        assert!(
7038            join_result.is_err(),
7039            "aborted flusher must surface JoinError, got Ok"
7040        );
7041        assert!(
7042            join_result.unwrap_err().is_cancelled(),
7043            "JoinError must report .is_cancelled() = true after abort()"
7044        );
7045        let _ = std::fs::remove_dir_all(&tmp);
7046    }
7047
7048    /// Stub backend used solely by the v0.8.5 #86 unit tests above —
7049    /// the `S4Service` constructor needs `B: S3` but the tests only
7050    /// exercise builder / accessor shape, never a handler call. Every
7051    /// `S3` method falls through to the trait's default
7052    /// `NotImplemented` (which `s3s` provides automatically).
7053    struct NoopBackend;
7054
7055    #[async_trait::async_trait]
7056    impl S3 for NoopBackend {}
7057
7058    /// v0.8.5 #81 (audit H-7): the panic-catch wrapper at the
7059    /// dispatcher spawn site must intercept a panicking inner future,
7060    /// log at ERROR, and bump the per-kind counter — instead of letting
7061    /// the panic propagate as a `JoinError` that no operator dashboard
7062    /// scrapes. We exercise the wrapper directly (rather than driving a
7063    /// full `spawn_replication_if_matched` end-to-end, which would
7064    /// require a full `S4Service` + backend) because the wrapper shape
7065    /// is the load-bearing piece — any inner-future swap would still
7066    /// route through the same `AssertUnwindSafe(...).catch_unwind()`
7067    /// closure we want to lock in here.
7068    #[tokio::test]
7069    async fn dispatcher_panic_caught_and_metric_bumped() {
7070        use futures::FutureExt as _;
7071
7072        let handle = crate::metrics::test_metrics_handle();
7073        let kind = "replication";
7074
7075        // Mirror the production wrapper shape verbatim — if the
7076        // production code ever stops using `AssertUnwindSafe.catch_unwind`
7077        // this test shouldn't keep passing on a hand-rolled copy that
7078        // diverged.
7079        let panicking = async {
7080            panic!("simulated dispatcher panic");
7081        };
7082        let result = std::panic::AssertUnwindSafe(panicking).catch_unwind().await;
7083        assert!(
7084            result.is_err(),
7085            "catch_unwind must surface the panic instead of swallowing it"
7086        );
7087        // Bump the production counter via the same helper the wrapper
7088        // calls so the rendered output gates on the production code
7089        // path, not a parallel bookkeeping copy.
7090        crate::metrics::record_dispatcher_panic(kind);
7091
7092        let rendered = handle.render();
7093        assert!(
7094            rendered.contains("s4_dispatcher_panics_total"),
7095            "expected s4_dispatcher_panics_total in metrics output, got: {rendered}"
7096        );
7097        assert!(
7098            rendered.contains("kind=\"replication\""),
7099            "expected kind=\"replication\" label in metrics output, got: {rendered}"
7100        );
7101    }
7102}