Skip to main content

s4_server/
service.rs

1//! `s3s::S3` 実装 — `s3s_aws::Proxy` への delegation を default にしつつ、
2//! `put_object` / `get_object` 経路で `s4_codec::CodecRegistry` を呼ぶ。
3//!
4//! ## カバー範囲 (Phase 1 月 2)
5//!
6//! - 圧縮 hook あり: `put_object`, `get_object`
7//! - 純 delegation (圧縮なし): `head_bucket`, `list_buckets`, `create_bucket`, `delete_bucket`,
8//!   `head_object`, `delete_object`, `delete_objects`, `copy_object`, `list_objects`,
9//!   `list_objects_v2`, `create_multipart_upload`, `upload_part`,
10//!   `complete_multipart_upload`, `abort_multipart_upload`, `list_multipart_uploads`,
11//!   `list_parts`
12//! - 未対応 (デフォルトで NotImplemented): その他 80+ ops (Tagging / ACL / Lifecycle 等は Phase 2)
13//!
14//! ## アーキテクチャ
15//!
16//! - `S4Service<B>` は backend (B: S3) と `Arc<CodecRegistry>` と `Arc<dyn CodecDispatcher>`
17//!   を保持する。`CodecRegistry` 経由で複数 codec を抱えられるので、ひとつの S4 インスタンスが
18//!   複数 codec で書かれた object を透過的に GET できる
19//! - PUT: dispatcher が body の先頭 sample から codec を選び、registry で compress、
20//!   manifest を S3 metadata に書いて backend に forward
21//! - GET: backend から取得 → metadata から manifest を復元 → registry.decompress で
22//!   manifest 指定の codec で解凍 → 元の bytes を return
23//!
24//! ## 既知の制限事項
25//!
26//! - **Multipart Upload は per-part 圧縮が未実装**: 現状は upload_part を素通し。
27//!   Phase 1 月 2 後半で per-part compress + complete_multipart_upload で manifest 集約。
28//! - **PUT body は memory に collect**: max_body_bytes 上限あり (default 5 GiB = S3 単発 PUT 上限)。
29//!   Streaming-aware 圧縮は Phase 2。
30
31use std::sync::Arc;
32
33use base64::Engine as _;
34use bytes::BytesMut;
35use s3s::dto::*;
36use s3s::{S3, S3Error, S3ErrorCode, S3Request, S3Response, S3Result};
37use s4_codec::index::{FrameIndex, build_index_from_body, decode_index, encode_index, sidecar_key};
38use s4_codec::multipart::{
39    FRAME_HEADER_BYTES, FrameHeader, FrameIter, S3_MULTIPART_MIN_PART_BYTES, pad_to_minimum,
40    write_frame,
41};
42use s4_codec::{ChunkManifest, CodecDispatcher, CodecKind, CodecRegistry, CompressTelemetry};
43use std::time::Instant;
44use tracing::{debug, info};
45
46use crate::blob::{
47    bytes_to_blob, chain_sample_with_rest, collect_blob, collect_with_sample, peek_sample,
48};
49use crate::streaming::{
50    Crc32cVerifyingReader, async_read_to_blob, blob_to_async_read, cpu_zstd_decompress_stream,
51    pick_chunk_size, streaming_compress_to_frames, supports_streaming_compress,
52    supports_streaming_decompress,
53};
54
55/// PUT body の先頭 sampling で渡す最大 byte 数。
56const SAMPLE_BYTES: usize = 4096;
57
58/// v0.8 #55: stamp the GPU pipeline metrics (`s4_gpu_compress_seconds`,
59/// `s4_gpu_throughput_bytes_per_sec`, `s4_gpu_oom_total`) from a
60/// `CompressTelemetry` returned by `CodecRegistry::compress_with_telemetry`.
61/// CPU codecs (`gpu_seconds = None`) are no-ops here — they're already
62/// covered by the existing `s4_request_latency_seconds` / `s4_bytes_*`
63/// counters in the request-level `record_put` / `record_get` calls.
64#[inline]
65fn stamp_gpu_compress_telemetry(tel: &CompressTelemetry) {
66    if let Some(secs) = tel.gpu_seconds {
67        crate::metrics::record_gpu_compress(tel.codec, secs, tel.bytes_in, tel.bytes_out);
68    }
69    if tel.oom {
70        crate::metrics::record_gpu_oom(tel.codec);
71    }
72}
73
74/// v0.7 #49: percent-encoding set covering everything that is **not** an
75/// `unreserved` character per RFC 3986 §2.3, **plus** we additionally
76/// encode the path-reserved sub-delims that `http::Uri` rejects in a
77/// path segment (`?`, `#`, `%`, control bytes, space, etc.). We
78/// deliberately keep `/` un-encoded because S3 keys legally use `/` as
79/// a logical separator and the rest of the synthetic URI relies on the
80/// path layout `/{bucket}/{key}` round-tripping byte-for-byte.
81const URI_KEY_ENCODE_SET: &percent_encoding::AsciiSet = &percent_encoding::CONTROLS
82    .add(b' ')
83    .add(b'"')
84    .add(b'#')
85    .add(b'<')
86    .add(b'>')
87    .add(b'?')
88    .add(b'`')
89    .add(b'{')
90    .add(b'}')
91    .add(b'|')
92    .add(b'\\')
93    .add(b'^')
94    .add(b'[')
95    .add(b']')
96    .add(b'%');
97
98/// v0.7 #49: build the synthetic `/{bucket}/{key}` request URI used by
99/// the sidecar / replication helpers when they re-enter the backend
100/// trait without going through the HTTP layer. S3 object keys can
101/// contain spaces, control bytes, and arbitrary Unicode that would
102/// make `format!(...).parse::<http::Uri>()` panic; we percent-encode
103/// the key bytes (RFC 3986 path segment) and the bucket name (defensive
104/// — bucket names are normally DNS-safe, but the helper is the single
105/// choke-point) before splicing them in. If the encoded form *still*
106/// fails to parse (extremely unlikely once everything outside the
107/// unreserved set is escaped) we surface a typed `400 InvalidObjectName`
108/// instead of crashing the worker.
109pub(crate) fn safe_object_uri(bucket: &str, key: &str) -> S3Result<http::Uri> {
110    use percent_encoding::utf8_percent_encode;
111    let bucket_enc = utf8_percent_encode(bucket, URI_KEY_ENCODE_SET);
112    let key_enc = utf8_percent_encode(key, URI_KEY_ENCODE_SET);
113    let raw = format!("/{bucket_enc}/{key_enc}");
114    raw.parse::<http::Uri>().map_err(|e| {
115        // S3 spec uses `InvalidObjectName` (HTTP 400) for keys that
116        // can't be represented in a request URI. The generated
117        // `S3ErrorCode` enum doesn't expose a typed variant for it,
118        // so we round-trip through `from_bytes` which preserves the
119        // canonical wire string while falling back to InvalidArgument
120        // if even that lookup fails (cannot happen at runtime — kept
121        // as a belt-and-suspenders branch so this helper never
122        // panics).
123        let code =
124            S3ErrorCode::from_bytes(b"InvalidObjectName").unwrap_or(S3ErrorCode::InvalidArgument);
125        S3Error::with_message(
126            code,
127            format!("object key cannot be encoded as a request URI: {e}"),
128        )
129    })
130}
131
132/// v0.4 #20: captured at the start of a handler, before the request is
133/// consumed by the backend call, so the matching `record_access` at
134/// end-of-request can fill in the structured access log entry.
135struct AccessLogPreamble {
136    remote_ip: Option<String>,
137    requester: Option<String>,
138    request_uri: String,
139    user_agent: Option<String>,
140}
141
142pub struct S4Service<B: S3> {
143    /// Wrapped in `Arc` so the v0.6 #40 cross-bucket replication
144    /// dispatcher can clone it into a detached `tokio::spawn` task
145    /// (Arc::clone is cheap; backend trait methods take `&self` so no
146    /// other handler is affected by the indirection).
147    backend: Arc<B>,
148    registry: Arc<CodecRegistry>,
149    dispatcher: Arc<dyn CodecDispatcher>,
150    max_body_bytes: usize,
151    policy: Option<crate::policy::SharedPolicy>,
152    /// v0.3 #13: surfaced as the `aws:SecureTransport` Condition key. Set
153    /// to `true` when the listener is wrapped in TLS (or ACME), so policies
154    /// gating "deny if not over TLS" can do their job. Defaults to `false`
155    /// (HTTP); set via [`S4Service::with_secure_transport`] at boot.
156    secure_transport: bool,
157    /// v0.4 #19: optional per-(principal, bucket) token-bucket limiter.
158    rate_limits: Option<crate::rate_limit::SharedRateLimits>,
159    /// v0.4 #20: optional S3-style access log emitter.
160    access_log: Option<crate::access_log::SharedAccessLog>,
161    /// v0.4 #21 / v0.5 #29: optional server-side encryption keyring
162    /// (AES-256-GCM). When set, every PUT body gets wrapped in S4E2
163    /// (with the keyring's active key id) after the compress + framing
164    /// steps; every GET that sniffs as S4E1/S4E2 is decrypted before
165    /// frame parsing. A `with_sse_key(...)` call wraps the supplied
166    /// key in a 1-slot keyring so single-key (v0.4) operators get the
167    /// same behaviour they had before, just on the v2 frame.
168    sse_keyring: Option<crate::sse::SharedSseKeyring>,
169    /// v0.5 #34: optional first-class versioning state machine. When
170    /// `Some(...)`, S4-server itself owns the per-bucket versioning
171    /// state + per-(bucket, key) version chain; PUT / GET / DELETE /
172    /// list_object_versions / get_bucket_versioning /
173    /// put_bucket_versioning handlers consult the manager instead of
174    /// passing through. When `None` (default), the legacy
175    /// backend-passthrough behaviour applies so existing v0.4
176    /// deployments are unaffected until they explicitly call
177    /// `with_versioning(...)`.
178    versioning: Option<Arc<crate::versioning::VersioningManager>>,
179    /// v0.5 #28: optional SSE-KMS envelope-encryption backend. When
180    /// `Some(...)`, PUTs carrying `x-amz-server-side-encryption: aws:kms`
181    /// generate a fresh DEK via the backend, encrypt the body with it
182    /// (S4E4 frame), and persist only the wrapped DEK. GETs sniffing as
183    /// S4E4 unwrap the DEK through the same backend before decrypt.
184    /// `kms_default_key_id` is used when the request omits an explicit
185    /// `x-amz-server-side-encryption-aws-kms-key-id` (mirrors AWS S3
186    /// bucket-default behaviour).
187    kms: Option<Arc<dyn crate::kms::KmsBackend>>,
188    kms_default_key_id: Option<String>,
189    /// v0.5 #30: optional Object Lock (WORM) enforcement layer. When
190    /// `Some(...)`, `delete_object` and overwrite-style `put_object`
191    /// consult the manager and refuse the operation with HTTP 403
192    /// `AccessDenied` while the object is locked (Compliance until
193    /// expiry, Governance unless the bypass header is set, or any time
194    /// a legal hold is on). PUT also auto-applies the bucket-default
195    /// retention to brand-new objects when configured. When `None`
196    /// (default), the legacy backend-passthrough behaviour applies, so
197    /// existing v0.4 deployments are unaffected until they explicitly
198    /// call `with_object_lock(...)`.
199    object_lock: Option<Arc<crate::object_lock::ObjectLockManager>>,
200    /// v0.6 #38: optional first-class CORS bucket configuration manager.
201    /// When `Some(...)`, S4-server itself owns per-bucket CORS rules and
202    /// `put_bucket_cors` / `get_bucket_cors` / `delete_bucket_cors`
203    /// consult the manager instead of passing through to the backend.
204    /// `handle_preflight` (public method on `S4Service`) routes OPTIONS-
205    /// style preflight matching through the same store; the actual HTTP
206    /// OPTIONS routing wire-up at the listener level is a follow-up
207    /// (s3s framework does not surface OPTIONS as a typed handler).
208    cors: Option<Arc<crate::cors::CorsManager>>,
209    /// v0.6 #36: optional first-class S3 Inventory manager. When
210    /// `Some(...)`, S4-server itself owns per-(bucket, id) inventory
211    /// configurations and `put_bucket_inventory_configuration` /
212    /// `get_bucket_inventory_configuration` /
213    /// `list_bucket_inventory_configurations` /
214    /// `delete_bucket_inventory_configuration` consult the manager
215    /// instead of passing through to the backend. The actual periodic
216    /// CSV emission is driven by a tokio task in `main.rs` that calls
217    /// `InventoryManager::run_once_for_test` on a fixed cadence; the
218    /// service handlers below only deal with config-level CRUD.
219    inventory: Option<Arc<crate::inventory::InventoryManager>>,
220    /// v0.6 #35: optional first-class S3 bucket-notification manager.
221    /// When `Some(...)`, S4-server itself owns per-bucket notification
222    /// configurations and `put_bucket_notification_configuration` /
223    /// `get_bucket_notification_configuration` consult the manager
224    /// instead of passing through to the backend. Successful PUT /
225    /// DELETE handlers fire matching destinations on a detached tokio
226    /// task (best-effort; see `crate::notifications::dispatch_event`).
227    notifications: Option<Arc<crate::notifications::NotificationManager>>,
228    /// v0.6 #37: optional first-class S3 Lifecycle configuration
229    /// manager. When `Some(...)`, S4-server itself owns per-bucket
230    /// lifecycle rules and `put_bucket_lifecycle_configuration` /
231    /// `get_bucket_lifecycle_configuration` /
232    /// `delete_bucket_lifecycle` consult the manager instead of
233    /// passing through to the backend. The actual background scanner
234    /// (list_objects_v2 -> evaluate -> delete / metadata-rewrite per
235    /// rule) is a v0.7+ follow-up; the test path
236    /// `S4Service::run_lifecycle_once_for_test` exercises the
237    /// evaluator end-to-end so this v0.6 #37 wiring is enough to ship
238    /// the configuration-management half without putting a
239    /// half-wired bucket-walk in front of users.
240    lifecycle: Option<Arc<crate::lifecycle::LifecycleManager>>,
241    /// v0.6 #39: optional first-class object + bucket Tagging manager.
242    /// When `Some(...)`, S4-server itself owns per-(bucket, key) and
243    /// per-bucket tag state — `PutObjectTagging` /
244    /// `GetObjectTagging` / `DeleteObjectTagging` /
245    /// `PutBucketTagging` / `GetBucketTagging` /
246    /// `DeleteBucketTagging` route through the manager (replacing the
247    /// previous backend-passthrough behaviour). `put_object` also
248    /// pre-parses the `x-amz-tagging` header / `Tagging` input field
249    /// so the IAM policy evaluator can gate on
250    /// `s3:RequestObjectTag/<key>` and `s3:ExistingObjectTag/<key>`.
251    /// On a successful PUT the parsed tags are persisted; on a
252    /// successful DELETE the matching tag entry is dropped.
253    tagging: Option<Arc<crate::tagging::TagManager>>,
254    /// v0.6 #40: optional first-class cross-bucket replication manager.
255    /// When `Some(...)`, S4-server itself owns per-bucket replication
256    /// rules; `PutBucketReplication` / `GetBucketReplication` /
257    /// `DeleteBucketReplication` route through the manager (replacing
258    /// the previous backend-passthrough behaviour). On every successful
259    /// `put_object` the manager's rule list is consulted; the
260    /// highest-priority matching enabled rule wins, the per-key status
261    /// is recorded as `Pending`, and the source body and metadata are
262    /// handed to a detached tokio task that PUTs to the destination
263    /// bucket through the same backend. The replica is stamped with
264    /// `x-amz-replication-status: REPLICA` in its metadata; the
265    /// source-side status is updated to `Completed` on success or
266    /// `Failed` after the 3-attempt retry budget is exhausted (drop
267    /// counter bumps in either-side case so dashboards see the loss).
268    /// `head_object` / `get_object` echo the recorded status back as
269    /// `x-amz-replication-status` so consumers can poll progress.
270    /// Limited to single-instance (same `S4Service`) replication; true
271    /// cross-region (multi-instance) is a v0.7+ follow-up.
272    replication: Option<Arc<crate::replication::ReplicationManager>>,
273    /// v0.6 #42: optional MFA-Delete enforcement layer. When `Some(...)`,
274    /// every DELETE / DELETE-version / delete-marker / `PutBucketVersioning`
275    /// request against a bucket whose MFA-Delete state is `Enabled`
276    /// must carry `x-amz-mfa: <serial> <code>` (RFC 6238 6-digit TOTP);
277    /// missing or invalid tokens return HTTP 403 `AccessDenied`. When
278    /// `None` (default), the gate is a no-op so existing v0.4 / v0.5
279    /// deployments are unaffected until they explicitly call
280    /// `with_mfa_delete(...)`.
281    mfa_delete: Option<Arc<crate::mfa::MfaDeleteManager>>,
282    /// v0.5 #32: when `true`, every PUT must carry an SSE indicator
283    /// (`x-amz-server-side-encryption`, the SSE-C customer-key headers,
284    /// or be matched against a configured server-managed keyring/KMS).
285    /// Set by `--compliance-mode strict` after the boot-time
286    /// prerequisite check passes.
287    compliance_strict: bool,
288    /// v0.7 #47: optional SigV4a (asymmetric ECDSA-P256-SHA256) verify
289    /// gate. When `Some(...)`, the listener-side middleware (see
290    /// [`crate::routing::try_sigv4a_verify`]) inspects every incoming
291    /// request and short-circuits SigV4a-signed ones — verifying the
292    /// signature against the credential store and returning 403
293    /// `SignatureDoesNotMatch` / `InvalidAccessKeyId` on failure. Plain
294    /// SigV4 (HMAC-SHA256) requests pass through to s3s untouched. When
295    /// `None`, the middleware is a no-op so the existing SigV4 path is
296    /// unaffected (operators opt in via `--sigv4a-credentials <DIR>`).
297    sigv4a_gate: Option<Arc<SigV4aGate>>,
298    /// v0.8 #54 BUG-5..10: per-`upload_id` side-table that ferries the
299    /// SSE / Tagging / Object-Lock context captured at
300    /// `CreateMultipartUpload` time through to `UploadPart` /
301    /// `CompleteMultipartUpload`. Always-on (no `with_*` flag) — the
302    /// store is gateway-internal and idle when no multipart is in
303    /// flight. See [`crate::multipart_state`] for rationale.
304    multipart_state: Arc<crate::multipart_state::MultipartStateStore>,
305    /// v0.8 #52: plaintext bytes per S4E5 chunk on the SSE-S4 PUT
306    /// path. `0` (default) → use the legacy buffered S4E2 path
307    /// (whole-body AES-GCM tag, GET buffers + verifies before
308    /// emitting). Non-zero → use the chunked S4E5 frame so GET can
309    /// stream-decrypt chunk-by-chunk. Wired by `--sse-chunk-size`
310    /// in `main.rs`. SSE-C and SSE-KMS are intentionally unaffected
311    /// (chunked variants tracked in a follow-up issue).
312    sse_chunk_size: usize,
313}
314
315impl<B: S3> S4Service<B> {
316    /// AWS S3 単発 PUT の API 上限 (5 GiB)
317    pub const DEFAULT_MAX_BODY_BYTES: usize = 5 * 1024 * 1024 * 1024;
318
319    pub fn new(
320        backend: B,
321        registry: Arc<CodecRegistry>,
322        dispatcher: Arc<dyn CodecDispatcher>,
323    ) -> Self {
324        Self {
325            backend: Arc::new(backend),
326            registry,
327            dispatcher,
328            max_body_bytes: Self::DEFAULT_MAX_BODY_BYTES,
329            policy: None,
330            secure_transport: false,
331            rate_limits: None,
332            access_log: None,
333            sse_keyring: None,
334            versioning: None,
335            kms: None,
336            kms_default_key_id: None,
337            object_lock: None,
338            cors: None,
339            inventory: None,
340            notifications: None,
341            lifecycle: None,
342            tagging: None,
343            replication: None,
344            mfa_delete: None,
345            compliance_strict: false,
346            sigv4a_gate: None,
347            multipart_state: Arc::new(crate::multipart_state::MultipartStateStore::new()),
348            // v0.8 #52: chunked SSE-S4 disabled by default — opt
349            // in via `S4Service::with_sse_chunk_size(...)` /
350            // `--sse-chunk-size <BYTES>`. Default keeps the legacy
351            // S4E2 buffered path so existing deployments are
352            // bit-for-bit unchanged.
353            sse_chunk_size: 0,
354        }
355    }
356
357    /// v0.7 #47: attach the SigV4a verify gate. Once set, the
358    /// listener-side middleware (`crate::routing::try_sigv4a_verify`)
359    /// short-circuits any incoming `AWS4-ECDSA-P256-SHA256` request,
360    /// verifying it against the supplied credential store and
361    /// returning 403 on failure. Plain SigV4 (HMAC-SHA256) requests
362    /// are unaffected. When the gate is unset (default), the
363    /// middleware skips entirely so existing SigV4 deployments keep
364    /// working.
365    #[must_use]
366    pub fn with_sigv4a_gate(mut self, gate: Arc<SigV4aGate>) -> Self {
367        self.sigv4a_gate = Some(gate);
368        self
369    }
370
371    /// v0.7 #47: borrow the attached SigV4a gate. Used by `main.rs`
372    /// to snapshot the gate `Arc` before the s3s `ServiceBuilder`
373    /// consumes the `S4Service` (the listener-side middleware needs
374    /// the same `Arc` because s3s' SigV4 verifier rejects SigV4a
375    /// algorithm tokens with "unknown algorithm" — match has to
376    /// happen at the hyper layer instead).
377    #[must_use]
378    pub fn sigv4a_gate(&self) -> Option<&Arc<SigV4aGate>> {
379        self.sigv4a_gate.as_ref()
380    }
381
382    /// v0.8.2 #62: borrow the multipart state store so `main.rs` can
383    /// snapshot the `Arc` before the s3s `ServiceBuilder` consumes
384    /// the `S4Service`. The background `sweep_stale` task in `main.rs`
385    /// holds this `Arc` and ticks once an hour to drop abandoned
386    /// upload contexts (and their `Zeroizing<[u8; 32]>` SSE-C keys).
387    #[must_use]
388    pub fn multipart_state(&self) -> &Arc<crate::multipart_state::MultipartStateStore> {
389        &self.multipart_state
390    }
391
392    /// v0.6 #39: attach the in-memory object + bucket Tagging manager.
393    /// Once set, `Put/Get/Delete` `Object/Bucket Tagging` route
394    /// through the manager (instead of forwarding to the backend),
395    /// and `put_object`'s `x-amz-tagging` parse path becomes the
396    /// source of `s3:RequestObjectTag/<key>` for the IAM policy
397    /// evaluator. The manager itself is shared via `Arc`.
398    #[must_use]
399    pub fn with_tagging(mut self, mgr: Arc<crate::tagging::TagManager>) -> Self {
400        self.tagging = Some(mgr);
401        self
402    }
403
404    /// v0.6 #39: borrow the attached tagging manager (test /
405    /// introspection — the snapshotter in `main.rs`, when wired,
406    /// will keep its own `Arc` clone).
407    #[must_use]
408    pub fn tag_manager(&self) -> Option<&Arc<crate::tagging::TagManager>> {
409        self.tagging.as_ref()
410    }
411
412    /// v0.6 #36: attach the in-memory S3 Inventory manager. Once set,
413    /// `put_bucket_inventory_configuration` /
414    /// `get_bucket_inventory_configuration` /
415    /// `list_bucket_inventory_configurations` /
416    /// `delete_bucket_inventory_configuration` route through the
417    /// manager. The actual periodic CSV / manifest emission is
418    /// orchestrated by a tokio task started in `main.rs`; the manager
419    /// itself is shared between the handler and the scheduler via
420    /// `Arc`.
421    #[must_use]
422    pub fn with_inventory(mut self, mgr: Arc<crate::inventory::InventoryManager>) -> Self {
423        self.inventory = Some(mgr);
424        self
425    }
426
427    /// v0.6 #36: borrow the attached inventory manager (test /
428    /// introspection — the background scheduler in `main.rs` keeps its
429    /// own `Arc` clone, so this accessor is for the test path that
430    /// invokes `run_once_for_test` directly).
431    #[must_use]
432    pub fn inventory_manager(&self) -> Option<&Arc<crate::inventory::InventoryManager>> {
433        self.inventory.as_ref()
434    }
435
436    /// v0.6 #37: attach the in-memory S3 Lifecycle configuration
437    /// manager. Once set, `put_bucket_lifecycle_configuration` /
438    /// `get_bucket_lifecycle_configuration` / `delete_bucket_lifecycle`
439    /// route through the manager (replacing the previous backend-
440    /// passthrough behaviour). The actual periodic scanner that walks
441    /// the source bucket and invokes Expiration / Transition /
442    /// NoncurrentExpiration actions is a v0.7+ follow-up — see
443    /// [`Self::run_lifecycle_once_for_test`] for the in-memory test
444    /// path that exercises the evaluator end-to-end.
445    #[must_use]
446    pub fn with_lifecycle(mut self, mgr: Arc<crate::lifecycle::LifecycleManager>) -> Self {
447        self.lifecycle = Some(mgr);
448        self
449    }
450
451    /// v0.6 #37: borrow the attached lifecycle manager (test /
452    /// introspection — the background scheduler in `main.rs` keeps its
453    /// own `Arc` clone, so this accessor is for the test path that
454    /// invokes the evaluator directly).
455    #[must_use]
456    pub fn lifecycle_manager(&self) -> Option<&Arc<crate::lifecycle::LifecycleManager>> {
457        self.lifecycle.as_ref()
458    }
459
460    /// v0.6 #37: synchronous test entry that runs the lifecycle evaluator
461    /// against a caller-provided list of `(key, age, size, tags)` tuples
462    /// and returns the `(key, action)` pairs that should fire. The actual
463    /// backend invocation (S3.delete_object / metadata rewrite) is left
464    /// to the caller — the unit + E2E tests use this to verify the
465    /// evaluator without spawning the (deferred) background scanner.
466    /// Returns an empty `Vec` when no lifecycle manager is attached or
467    /// no rule matches.
468    #[must_use]
469    pub fn run_lifecycle_once_for_test(
470        &self,
471        bucket: &str,
472        objects: &[crate::lifecycle::EvaluateBatchEntry],
473    ) -> Vec<(String, crate::lifecycle::LifecycleAction)> {
474        let Some(mgr) = self.lifecycle.as_ref() else {
475            return Vec::new();
476        };
477        crate::lifecycle::evaluate_batch(mgr, bucket, objects)
478    }
479
480    /// v0.6 #35: attach the in-memory bucket-notification manager. Once
481    /// set, `put_bucket_notification_configuration` /
482    /// `get_bucket_notification_configuration` route through the manager
483    /// (replacing the previous backend-passthrough behaviour); successful
484    /// `put_object` / `delete_object` calls fire matching destinations
485    /// on a detached tokio task via
486    /// `crate::notifications::dispatch_event` (best-effort, fire-and-
487    /// forget — failures bump the manager's `dropped_total` counter and
488    /// log at warn but do NOT fail the originating S3 request).
489    #[must_use]
490    pub fn with_notifications(
491        mut self,
492        mgr: Arc<crate::notifications::NotificationManager>,
493    ) -> Self {
494        self.notifications = Some(mgr);
495        self
496    }
497
498    /// v0.6 #35: borrow the attached notifications manager (test /
499    /// introspection — used by the metrics layer to read
500    /// `dropped_total`).
501    #[must_use]
502    pub fn notifications_manager(&self) -> Option<&Arc<crate::notifications::NotificationManager>> {
503        self.notifications.as_ref()
504    }
505
506    /// v0.6 #35: internal helper used by the DELETE handlers to fire a
507    /// matching notification on a detached tokio task. No-op when no
508    /// manager is attached or no rule on the bucket matches the given
509    /// (event, key) tuple.
510    fn fire_delete_notification(
511        &self,
512        bucket: &str,
513        key: &str,
514        event: crate::notifications::EventType,
515        version_id: Option<String>,
516    ) {
517        let Some(mgr) = self.notifications.as_ref() else {
518            return;
519        };
520        let dests = mgr.match_destinations(bucket, &event, key);
521        if dests.is_empty() {
522            return;
523        }
524        tokio::spawn(crate::notifications::dispatch_event(
525            Arc::clone(mgr),
526            bucket.to_owned(),
527            key.to_owned(),
528            event,
529            None,
530            None,
531            version_id,
532            format!("S4-{}", uuid::Uuid::new_v4()),
533        ));
534    }
535
536    /// v0.6 #40: attach the in-memory cross-bucket replication manager.
537    /// Once set, `put_bucket_replication` / `get_bucket_replication` /
538    /// `delete_bucket_replication` route through the manager (replacing
539    /// the previous backend-passthrough behaviour); a successful
540    /// `put_object` whose key matches an enabled rule fires a detached
541    /// tokio task that PUTs the same body + metadata to the rule's
542    /// destination bucket, stamping the replica with
543    /// `x-amz-replication-status: REPLICA`. Failures after the retry
544    /// budget bump the manager's `dropped_total` counter and are
545    /// surfaced in the `s4_replication_dropped_total` Prometheus
546    /// counter; successes bump `s4_replication_replicated_total`.
547    #[must_use]
548    pub fn with_replication(mut self, mgr: Arc<crate::replication::ReplicationManager>) -> Self {
549        self.replication = Some(mgr);
550        self
551    }
552
553    /// v0.6 #40: borrow the attached replication manager (test /
554    /// introspection — used by the metrics layer to read
555    /// `dropped_total`).
556    #[must_use]
557    pub fn replication_manager(&self) -> Option<&Arc<crate::replication::ReplicationManager>> {
558        self.replication.as_ref()
559    }
560
561    /// v0.6 #40: internal helper used by the PUT handlers to fire a
562    /// detached cross-bucket replication task. No-op when no manager
563    /// is attached, the source backend PUT failed, or no rule on the
564    /// source bucket matches the (key, tags) tuple. The `body` is the
565    /// post-compression / post-encryption `Bytes` that was sent to
566    /// the source backend (refcount-cloned), and `metadata` is the
567    /// metadata map that already includes the manifest /
568    /// `s4-encrypted` markers — the replica decodes through the same
569    /// path. The destination PUT runs through `Arc<B>::put_object`.
570    ///
571    /// ## v0.8.2 #61: generation token + shadow-key destination
572    ///
573    /// `pending_version` is the source-side `PutOutcome` minted by the
574    /// caller's versioning branch (or `None` for unversioned /
575    /// suspended buckets). When `pending_version.versioned_response`
576    /// is `true`, the dispatcher writes the destination under the same
577    /// shadow path the source uses (`<key>.__s4ver__/<vid>`) so the
578    /// destination's version chain receives the new version the same
579    /// way `?versionId=` GET resolves it. Closes audit C-1.
580    ///
581    /// The dispatcher also mints a fresh `generation` token before
582    /// spawning, threaded through to [`crate::replication::
583    /// replicate_object`]. Closes audit C-3 — a stale retry of an
584    /// older PUT can no longer overwrite the destination's newer bytes
585    /// because the CAS guard sees the higher stored generation and
586    /// drops its destination write.
587    ///
588    /// ## Asymmetric versioning policy (out of scope)
589    ///
590    /// We assume source + destination buckets share the same
591    /// versioning policy (both Enabled or both Suspended /
592    /// Unversioned). Cross-bucket policy queries would require a
593    /// backend round-trip per replication, which is not worth it for
594    /// the single-instance scope. Operators who configure asymmetric
595    /// versioning will see destination-side `?versionId=` lookups
596    /// miss — documented as out-of-scope until a future per-rule
597    /// `destination_versioning_policy` knob lands.
598    // 8 args is the post-#61 shape: replication needs the
599    // source bucket+key, the canonical tag set for rule-matching,
600    // the post-codec body+metadata for the destination PUT, the
601    // backend-success gate, and the pending version-id for the
602    // shadow-key destination override. A shape struct would just
603    // split the (single) call site so opt for the inline form.
604    #[allow(clippy::too_many_arguments)]
605    fn spawn_replication_if_matched(
606        &self,
607        source_bucket: &str,
608        source_key: &str,
609        request_tags: &Option<crate::tagging::TagSet>,
610        body: &bytes::Bytes,
611        metadata: &Option<std::collections::HashMap<String, String>>,
612        backend_ok: bool,
613        pending_version: Option<&crate::versioning::PutOutcome>,
614    ) where
615        B: Send + Sync + 'static,
616    {
617        if !backend_ok {
618            return;
619        }
620        let Some(mgr) = self.replication.as_ref() else {
621            return;
622        };
623        // Pull the request's tags into the (k, v) shape the matcher
624        // expects. The tagging manager would have the canonical
625        // post-PUT view but at this point in the pipeline it's
626        // already been written above; for the rule-match decision
627        // the request's tags are sufficient (= the tags this PUT
628        // applies, S3 PutObject is full-replace on tags).
629        let object_tags: Vec<(String, String)> = request_tags
630            .as_ref()
631            .map(|ts| ts.iter().cloned().collect())
632            .unwrap_or_default();
633        let Some(rule) = mgr.match_rule(source_bucket, source_key, &object_tags) else {
634            return;
635        };
636        // v0.8.2 #61: mint the per-PUT generation BEFORE the eager
637        // Pending stamp so the stamp itself carries the right
638        // generation (the CAS in `record_status_if_newer` would
639        // otherwise see a `generation=0` Pending and accept any
640        // stale retry).
641        let generation = mgr.next_generation();
642        // Eagerly mark the source key as Pending so a HEAD between
643        // the source PUT returning and the spawned task completing
644        // surfaces the in-flight state. CAS-guarded so a slower
645        // older PUT can't downgrade a newer Completed back to Pending.
646        let _ = mgr.record_status_if_newer(
647            source_bucket,
648            source_key,
649            generation,
650            crate::replication::ReplicationStatus::Pending,
651        );
652        // v0.8.2 #61: derive the destination storage key. For a
653        // versioning-Enabled source the destination receives the
654        // same shadow-key path so a `?versionId=<vid>` GET on the
655        // destination resolves through the same lookup the source
656        // uses. Suspended / Unversioned sources keep the logical
657        // key (= `None` override = dispatcher uses `source_key`).
658        let destination_key_override = pending_version
659            .filter(|pv| pv.versioned_response)
660            .map(|pv| versioned_shadow_key(source_key, &pv.version_id));
661        // v0.8.3 #68 (audit M-1): capture the source object's Object
662        // Lock state so the dispatcher can decorate the destination
663        // PUT with the matching AWS-wire lock headers. Without this,
664        // a Compliance / Governance / legal-hold protected source
665        // would replicate to a destination where DELETE succeeds
666        // (the WORM posture would only hold on the source).
667        let source_lock_state = self
668            .object_lock
669            .as_ref()
670            .and_then(|mgr| mgr.get(source_bucket, source_key));
671        // v0.8.3 #68: hand the destination-side ObjectLockManager to
672        // the dispatcher closure so we can persist the propagated
673        // lock state on successful destination PUT (the destination
674        // PUT below bypasses S4Service::put_object — we drive the
675        // backend directly — so the explicit_lock_mode commit block
676        // in put_object never fires for replicas. We replay it here
677        // against the destination key.)
678        let dest_lock_mgr = self.object_lock.as_ref().map(Arc::clone);
679        let mgr_cl = Arc::clone(mgr);
680        let backend = Arc::clone(&self.backend);
681        let body_cl = body.clone();
682        let metadata_cl = metadata.clone();
683        let source_bucket_cl = source_bucket.to_owned();
684        let source_key_cl = source_key.to_owned();
685        let source_lock_state_for_closure = source_lock_state.clone();
686        let source_bucket_for_warn = source_bucket.to_owned();
687        tokio::spawn(async move {
688            let do_put = move |dest_bucket: String,
689                               dest_key: String,
690                               dest_body: bytes::Bytes,
691                               dest_meta: Option<std::collections::HashMap<String, String>>| {
692                let backend = Arc::clone(&backend);
693                let dest_lock_mgr = dest_lock_mgr.clone();
694                let lock_state = source_lock_state_for_closure.clone();
695                let warn_src = source_bucket_for_warn.clone();
696                async move {
697                    let req = S3Request {
698                        input: PutObjectInput {
699                            bucket: dest_bucket.clone(),
700                            key: dest_key.clone(),
701                            body: Some(bytes_to_blob(dest_body)),
702                            metadata: dest_meta,
703                            ..Default::default()
704                        },
705                        method: http::Method::PUT,
706                        uri: "/".parse().unwrap(),
707                        headers: http::HeaderMap::new(),
708                        extensions: http::Extensions::new(),
709                        credentials: None,
710                        region: None,
711                        service: None,
712                        trailing_headers: None,
713                    };
714                    let put_result = backend
715                        .put_object(req)
716                        .await
717                        .map(|_| ())
718                        .map_err(|e| format!("destination put_object: {e}"));
719                    // v0.8.3 #68: on successful destination PUT,
720                    // persist the propagated lock state into the
721                    // destination's ObjectLockManager so a subsequent
722                    // DELETE on the destination is refused. Three cases:
723                    //   - PUT failed     → skip (no replica to protect)
724                    //   - lock_state None → nothing to propagate
725                    //   - dest manager None (operator misconfig)
726                    //                     → log warn-once + bump skip metric
727                    if put_result.is_ok()
728                        && let Some(state) = lock_state
729                    {
730                        match dest_lock_mgr {
731                            Some(ref mgr) => {
732                                mgr.set(&dest_bucket, &dest_key, state);
733                            }
734                            None => {
735                                crate::replication::warn_lock_propagation_skipped(
736                                    &warn_src,
737                                    &dest_bucket,
738                                );
739                            }
740                        }
741                    }
742                    put_result
743                }
744            };
745            crate::replication::replicate_object(
746                rule,
747                source_bucket_cl,
748                source_key_cl,
749                body_cl,
750                metadata_cl,
751                do_put,
752                mgr_cl,
753                generation,
754                destination_key_override,
755                source_lock_state,
756            )
757            .await;
758        });
759    }
760
761    /// v0.6 #42: attach the in-memory MFA-Delete enforcement manager.
762    /// Once set, every DELETE / DELETE-version / delete-marker /
763    /// `PutBucketVersioning` request against a bucket whose MFA-Delete
764    /// state is `Enabled` requires a valid `x-amz-mfa: <serial> <code>`
765    /// header (RFC 6238 6-digit TOTP); the gate is a no-op for buckets
766    /// where MFA-Delete is `Disabled` (S3 default).
767    #[must_use]
768    pub fn with_mfa_delete(mut self, mgr: Arc<crate::mfa::MfaDeleteManager>) -> Self {
769        self.mfa_delete = Some(mgr);
770        self
771    }
772
773    /// v0.6 #42: borrow the attached MFA-Delete manager (test /
774    /// introspection — used by the snapshot path in `main.rs` to call
775    /// `to_json` for restart-recoverable state).
776    #[must_use]
777    pub fn mfa_delete_manager(&self) -> Option<&Arc<crate::mfa::MfaDeleteManager>> {
778        self.mfa_delete.as_ref()
779    }
780
781    /// v0.6 #38: attach the in-memory CORS configuration manager. Once
782    /// set, `put_bucket_cors` / `get_bucket_cors` / `delete_bucket_cors`
783    /// route through the manager instead of forwarding to the backend,
784    /// and [`Self::handle_preflight`] becomes useful for the (future)
785    /// listener-side OPTIONS interceptor.
786    #[must_use]
787    pub fn with_cors(mut self, mgr: Arc<crate::cors::CorsManager>) -> Self {
788        self.cors = Some(mgr);
789        self
790    }
791
792    /// v0.6 #38: Borrow the attached CORS manager (test / introspection).
793    #[must_use]
794    pub fn cors_manager(&self) -> Option<&Arc<crate::cors::CorsManager>> {
795        self.cors.as_ref()
796    }
797
798    /// v0.6 #38: evaluate a CORS preflight request against the bucket's
799    /// configured rules and, if a rule matches, return the headers that
800    /// the (future) listener-side OPTIONS interceptor must put on the
801    /// 200 response: `Access-Control-Allow-Origin`, `Access-Control-
802    /// Allow-Methods`, `Access-Control-Allow-Headers`, optionally
803    /// `Access-Control-Max-Age` and `Access-Control-Expose-Headers`.
804    ///
805    /// Returns `None` when no manager is attached, no config is
806    /// registered for the bucket, or no rule matches the (origin,
807    /// method, headers) triple. The caller is responsible for turning
808    /// `None` into the appropriate 403 response.
809    ///
810    /// **Note:** the OPTIONS routing itself (i.e. wiring this method
811    /// into the hyper-util listener path) is a follow-up — s3s does not
812    /// surface OPTIONS as a typed S3 handler, so this method is
813    /// currently call-able only from inside other handlers and tests.
814    #[must_use]
815    pub fn handle_preflight(
816        &self,
817        bucket: &str,
818        origin: &str,
819        method: &str,
820        request_headers: &[String],
821    ) -> Option<std::collections::HashMap<String, String>> {
822        let mgr = self.cors.as_ref()?;
823        let rule = mgr.match_preflight(bucket, origin, method, request_headers)?;
824        let mut h = std::collections::HashMap::new();
825        // Echo the matched origin back. If the rule used "*" we still
826        // echo "*" (S3 spec — the spec does not require us to echo the
827        // *requesting* origin when the wildcard matched).
828        let allow_origin = if rule.allowed_origins.iter().any(|o| o == "*") {
829            "*".to_string()
830        } else {
831            origin.to_string()
832        };
833        h.insert("Access-Control-Allow-Origin".to_string(), allow_origin);
834        h.insert(
835            "Access-Control-Allow-Methods".to_string(),
836            rule.allowed_methods.join(", "),
837        );
838        if !rule.allowed_headers.is_empty() {
839            // For the Allow-Headers response, echo back the rule's
840            // pattern list verbatim (S3 echoes the configured list,
841            // including "*" if present). Browsers honour exact-match
842            // rules.
843            h.insert(
844                "Access-Control-Allow-Headers".to_string(),
845                rule.allowed_headers.join(", "),
846            );
847        }
848        if let Some(secs) = rule.max_age_seconds {
849            h.insert("Access-Control-Max-Age".to_string(), secs.to_string());
850        }
851        if !rule.expose_headers.is_empty() {
852            h.insert(
853                "Access-Control-Expose-Headers".to_string(),
854                rule.expose_headers.join(", "),
855            );
856        }
857        Some(h)
858    }
859
860    /// v0.5 #32: enable strict compliance mode. Every PUT must carry an
861    /// SSE indicator (server-side encryption header or SSE-C customer
862    /// key); requests without one are rejected with 400 InvalidRequest.
863    /// Boot-time prerequisite checking lives in the binary
864    /// (`validate_compliance_mode`) so this flag is purely the runtime
865    /// switch.
866    #[must_use]
867    pub fn with_compliance_strict(mut self, on: bool) -> Self {
868        self.compliance_strict = on;
869        self
870    }
871
872    /// v0.5 #30: attach the in-memory Object Lock (WORM) enforcement
873    /// manager. Once set, `delete_object` and overwrite-path
874    /// `put_object` refuse operations on locked keys with HTTP 403
875    /// `AccessDenied`; new PUTs to a bucket with a default retention
876    /// policy auto-create per-object lock state.
877    #[must_use]
878    pub fn with_object_lock(mut self, mgr: Arc<crate::object_lock::ObjectLockManager>) -> Self {
879        self.object_lock = Some(mgr);
880        self
881    }
882
883    /// v0.7 #45: borrow the attached Object Lock manager (read-only —
884    /// the lifecycle scanner uses this to skip currently-locked objects
885    /// before issuing `delete_object`, since an Object Lock always wins
886    /// over Lifecycle Expiration in AWS S3 semantics). Mirrors the
887    /// shape of [`Self::lifecycle_manager`] /
888    /// [`Self::tag_manager`] — purely additive accessor, no handler
889    /// behaviour change.
890    #[must_use]
891    pub fn object_lock_manager(&self) -> Option<&Arc<crate::object_lock::ObjectLockManager>> {
892        self.object_lock.as_ref()
893    }
894
895    /// v0.5 #28: attach an SSE-KMS backend. `default_key_id` is used
896    /// when a PUT requests SSE-KMS without naming a specific KMS key
897    /// (operators set this to mirror AWS S3's bucket-default key).
898    #[must_use]
899    pub fn with_kms_backend(
900        mut self,
901        kms: Arc<dyn crate::kms::KmsBackend>,
902        default_key_id: Option<String>,
903    ) -> Self {
904        self.kms = Some(kms);
905        self.kms_default_key_id = default_key_id;
906        self
907    }
908
909    /// v0.5 #34: attach the first-class versioning state machine. Once
910    /// set, this `S4Service` owns the per-bucket versioning state +
911    /// per-(bucket, key) version chain; `put_object` / `get_object` /
912    /// `delete_object` / `list_object_versions` /
913    /// `get_bucket_versioning` / `put_bucket_versioning` consult the
914    /// manager instead of passing through to the backend. The backend
915    /// is still used as the byte store: Suspended / Unversioned buckets
916    /// keep using `<key>` directly (legacy), Enabled buckets redirect
917    /// each version's bytes to a shadow key
918    /// (`<key>.__s4ver__/<version-id>`) so older versions survive newer
919    /// PUTs to the same logical key.
920    #[must_use]
921    pub fn with_versioning(mut self, mgr: Arc<crate::versioning::VersioningManager>) -> Self {
922        self.versioning = Some(mgr);
923        self
924    }
925
926    /// v0.4 #21 (kept for back-compat): attach a single SSE-S4 key.
927    /// Internally wraps it in a 1-slot keyring with id=1 active, so
928    /// new objects ride the v0.5 S4E2 frame while previously-written
929    /// S4E1 bytes (this same key) still decrypt via the keyring's S4E1
930    /// fallback path. Operators wanting true rotation should call
931    /// [`Self::with_sse_keyring`] instead.
932    #[must_use]
933    pub fn with_sse_key(mut self, key: crate::sse::SharedSseKey) -> Self {
934        let keyring = crate::sse::SseKeyring::new(1, key);
935        self.sse_keyring = Some(std::sync::Arc::new(keyring));
936        self
937    }
938
939    /// v0.5 #29: attach a multi-key SSE-S4 keyring. PUT encrypts under
940    /// the active key (S4E2 frame stamped with that key's id); GET
941    /// dispatches on the body's magic — S4E1 falls back to trying every
942    /// key in the ring (active first) so v0.4 objects survive a
943    /// migration; S4E2 looks up the explicit key_id from the header.
944    #[must_use]
945    pub fn with_sse_keyring(mut self, keyring: crate::sse::SharedSseKeyring) -> Self {
946        self.sse_keyring = Some(keyring);
947        self
948    }
949
950    /// v0.8 #52: opt the SSE-S4 PUT path into the chunked S4E5 frame
951    /// (so the matching GET can stream-decrypt chunk-by-chunk
952    /// instead of buffering the entire body before tag verify).
953    /// `bytes` is the plaintext slice size — typically 1 MiB; 0
954    /// disables the path and reverts to the legacy S4E2 buffered
955    /// frame.
956    ///
957    /// SSE-C (S4E3) and SSE-KMS (S4E4) are intentionally untouched:
958    /// the chunked envelopes for those flows are a follow-up issue
959    /// (the customer-key wire surface needs separate version
960    /// negotiation).
961    ///
962    /// Has no effect when `with_sse_keyring` / `with_sse_key` is
963    /// not also set — the chunked path runs only on the SSE-S4
964    /// branch of `put_object`.
965    #[must_use]
966    pub fn with_sse_chunk_size(mut self, bytes: usize) -> Self {
967        self.sse_chunk_size = bytes;
968        self
969    }
970
971    /// v0.4 #20: attach an S3-style access-log emitter. Each completed
972    /// PUT / GET / DELETE / List handler emits one entry into the
973    /// emitter's buffer; a background flusher (started separately, see
974    /// [`crate::access_log::AccessLog::spawn_flusher`]) writes hourly
975    /// rotated `.log` files into the configured directory.
976    #[must_use]
977    pub fn with_access_log(mut self, log: crate::access_log::SharedAccessLog) -> Self {
978        self.access_log = Some(log);
979        self
980    }
981
982    /// Capture the per-request access-log preamble before the request is
983    /// consumed by the backend call. Returns `None` if no access logger
984    /// is configured (cheap early-out so the handler doesn't pay the
985    /// header-clone cost when access logging is off).
986    fn access_log_preamble<I>(&self, req: &S3Request<I>) -> Option<AccessLogPreamble> {
987        self.access_log.as_ref()?;
988        Some(AccessLogPreamble {
989            remote_ip: req
990                .headers
991                .get("x-forwarded-for")
992                .and_then(|v| v.to_str().ok())
993                .and_then(|raw| raw.split(',').next())
994                .map(|s| s.trim().to_owned()),
995            requester: Self::principal_of(req).map(str::to_owned),
996            request_uri: format!("{} {}", req.method, req.uri.path()),
997            user_agent: req
998                .headers
999                .get("user-agent")
1000                .and_then(|v| v.to_str().ok())
1001                .map(str::to_owned),
1002        })
1003    }
1004
1005    /// Internal — called by handlers at end-of-request with a captured
1006    /// preamble. Best-effort: swallows the await fast (clones Arc +
1007    /// pushes), no error propagation back to the request path.
1008    #[allow(clippy::too_many_arguments)]
1009    async fn record_access(
1010        &self,
1011        preamble: Option<AccessLogPreamble>,
1012        operation: &'static str,
1013        bucket: &str,
1014        key: Option<&str>,
1015        http_status: u16,
1016        bytes_sent: u64,
1017        object_size: u64,
1018        total_time_ms: u64,
1019        error_code: Option<&str>,
1020    ) {
1021        let (Some(log), Some(p)) = (self.access_log.as_ref(), preamble) else {
1022            return;
1023        };
1024        log.record(crate::access_log::AccessLogEntry {
1025            time: std::time::SystemTime::now(),
1026            bucket: bucket.to_owned(),
1027            remote_ip: p.remote_ip,
1028            requester: p.requester,
1029            operation,
1030            key: key.map(str::to_owned),
1031            request_uri: p.request_uri,
1032            http_status,
1033            error_code: error_code.map(str::to_owned),
1034            bytes_sent,
1035            object_size,
1036            total_time_ms,
1037            user_agent: p.user_agent,
1038        })
1039        .await;
1040    }
1041
1042    /// v0.4 #19: attach a per-(principal, bucket) token-bucket rate limiter.
1043    /// When set, every PUT / GET / DELETE / List / Copy / multipart op is
1044    /// throttle-checked before the policy gate; throttled requests return
1045    /// `S3ErrorCode::SlowDown` (HTTP 503) and bump
1046    /// `s4_rate_limit_throttled_total{principal,bucket}`.
1047    #[must_use]
1048    pub fn with_rate_limits(mut self, rl: crate::rate_limit::SharedRateLimits) -> Self {
1049        self.rate_limits = Some(rl);
1050        self
1051    }
1052
1053    /// Helper used by request handlers to apply the rate limit. Returns
1054    /// `Ok(())` when allowed (or no rate limiter is configured), or a
1055    /// `SlowDown` S3Error otherwise.
1056    fn enforce_rate_limit<I>(&self, req: &S3Request<I>, bucket: &str) -> S3Result<()> {
1057        let Some(rl) = self.rate_limits.as_ref() else {
1058            return Ok(());
1059        };
1060        let principal_id = Self::principal_of(req);
1061        if !rl.check(principal_id, bucket) {
1062            crate::metrics::record_rate_limit_throttle(principal_id.unwrap_or("-"), bucket);
1063            return Err(S3Error::with_message(
1064                S3ErrorCode::SlowDown,
1065                format!("rate-limited: bucket={bucket}"),
1066            ));
1067        }
1068        Ok(())
1069    }
1070
1071    /// Tell the policy evaluator that the listener is reached over TLS
1072    /// (or ACME). When `true`, the `aws:SecureTransport` Condition key
1073    /// resolves to `true`. Defaults to `false`.
1074    #[must_use]
1075    pub fn with_secure_transport(mut self, on: bool) -> Self {
1076        self.secure_transport = on;
1077        self
1078    }
1079
1080    #[must_use]
1081    pub fn with_max_body_bytes(mut self, n: usize) -> Self {
1082        self.max_body_bytes = n;
1083        self
1084    }
1085
1086    /// Attach an optional bucket policy (v0.2 #7). When `Some(...)`, every
1087    /// PUT / GET / DELETE / List handler runs `policy.evaluate(...)` before
1088    /// delegating to the backend; failures return `S3ErrorCode::AccessDenied`.
1089    /// When `None` (the default), no policy enforcement happens.
1090    #[must_use]
1091    pub fn with_policy(mut self, policy: crate::policy::SharedPolicy) -> Self {
1092        self.policy = Some(policy);
1093        self
1094    }
1095
1096    /// Pull the SigV4 access key id off the request's credentials, if any.
1097    /// Used as the `principal_id` for policy evaluation.
1098    fn principal_of<I>(req: &S3Request<I>) -> Option<&str> {
1099        req.credentials.as_ref().map(|c| c.access_key.as_str())
1100    }
1101
1102    /// v0.3 #13: build the per-request policy context from the incoming
1103    /// `S3Request`. Pulls `aws:UserAgent` from the User-Agent header,
1104    /// `aws:SourceIp` from the standard `X-Forwarded-For` header (most
1105    /// production deployments are behind an LB / reverse proxy that sets
1106    /// this), `aws:CurrentTime` from the system clock, and
1107    /// `aws:SecureTransport` from the per-listener TLS flag.
1108    fn request_context<I>(&self, req: &S3Request<I>) -> crate::policy::RequestContext {
1109        let user_agent = req
1110            .headers
1111            .get("user-agent")
1112            .and_then(|v| v.to_str().ok())
1113            .map(str::to_owned);
1114        // X-Forwarded-For is `client, proxy1, proxy2`; the leftmost entry
1115        // is the original client. Trim and parse leniently.
1116        let source_ip = req
1117            .headers
1118            .get("x-forwarded-for")
1119            .and_then(|v| v.to_str().ok())
1120            .and_then(|raw| raw.split(',').next())
1121            .and_then(|s| s.trim().parse().ok());
1122        crate::policy::RequestContext {
1123            source_ip,
1124            user_agent,
1125            request_time: Some(std::time::SystemTime::now()),
1126            secure_transport: self.secure_transport,
1127            existing_object_tags: None,
1128            request_object_tags: None,
1129            extra: Default::default(),
1130        }
1131    }
1132
1133    /// Helper used by request handlers to enforce the optional policy.
1134    /// Returns `Ok(())` when allowed (or no policy is configured), or an
1135    /// `AccessDenied` S3Error otherwise. Bumps the policy denial Prometheus
1136    /// counter on deny.
1137    fn enforce_policy<I>(
1138        &self,
1139        req: &S3Request<I>,
1140        action: &'static str,
1141        bucket: &str,
1142        key: Option<&str>,
1143    ) -> S3Result<()> {
1144        self.enforce_policy_with_extra(req, action, bucket, key, None, None)
1145    }
1146
1147    /// v0.6 #39: variant of [`Self::enforce_policy`] that lets the
1148    /// caller plumb tag context (existing-on-object + on-request) into
1149    /// the policy evaluator. Both arguments default to `None`, in
1150    /// which case the resulting `RequestContext` is identical to
1151    /// [`Self::enforce_policy`]'s — so for handlers that don't deal
1152    /// with tags this is a transparent no-op.
1153    fn enforce_policy_with_extra<I>(
1154        &self,
1155        req: &S3Request<I>,
1156        action: &'static str,
1157        bucket: &str,
1158        key: Option<&str>,
1159        request_tags: Option<&crate::tagging::TagSet>,
1160        existing_tags: Option<&crate::tagging::TagSet>,
1161    ) -> S3Result<()> {
1162        let Some(policy) = self.policy.as_ref() else {
1163            return Ok(());
1164        };
1165        let principal_id = Self::principal_of(req);
1166        let mut ctx = self.request_context(req);
1167        if let Some(t) = request_tags {
1168            ctx.request_object_tags = Some(t.clone());
1169        }
1170        if let Some(t) = existing_tags {
1171            ctx.existing_object_tags = Some(t.clone());
1172        }
1173        let decision = policy.evaluate_with(action, bucket, key, principal_id, &ctx);
1174        if decision.allow {
1175            Ok(())
1176        } else {
1177            crate::metrics::record_policy_denial(action, bucket);
1178            tracing::info!(
1179                action,
1180                bucket,
1181                key = ?key,
1182                principal = ?principal_id,
1183                source_ip = ?ctx.source_ip,
1184                user_agent = ?ctx.user_agent,
1185                secure_transport = ctx.secure_transport,
1186                matched_sid = ?decision.matched_sid,
1187                effect = ?decision.matched_effect,
1188                "S4 policy denied request"
1189            );
1190            Err(S3Error::with_message(
1191                S3ErrorCode::AccessDenied,
1192                format!("denied by S4 policy: {action} on bucket={bucket}"),
1193            ))
1194        }
1195    }
1196
1197    /// テスト用: backend を取り戻す (test helper、production では使わない).
1198    /// v0.6 #40 で `backend` が `Arc<B>` 化したので `Arc::try_unwrap` で
1199    /// 1-clone の場合のみ返す。共有されている (= replication dispatcher が
1200    /// 同じ Arc を持っていて未完了) 場合は `Err` を返さず panic させる
1201    /// (test 用途専用 helper の caller 契約を維持)。
1202    pub fn into_backend(self) -> B {
1203        Arc::try_unwrap(self.backend).unwrap_or_else(|_| {
1204            panic!("into_backend: backend Arc still shared (replication dispatcher in flight?)")
1205        })
1206    }
1207
1208    /// 必要 frame だけを backend に Range GET し、frame parse + decompress + slice
1209    /// した結果を返す sidecar fast path。Range request の **帯域節約版**。
1210    async fn partial_range_get(
1211        &self,
1212        req: &S3Request<GetObjectInput>,
1213        plan: s4_codec::index::RangePlan,
1214        client_start: u64,
1215        client_end_exclusive: u64,
1216        total_original: u64,
1217        get_start: Instant,
1218    ) -> S3Result<S3Response<GetObjectOutput>> {
1219        // 必要 byte 範囲だけを backend に partial GET
1220        let backend_range = s3s::dto::Range::Int {
1221            first: plan.byte_start,
1222            last: Some(plan.byte_end_exclusive - 1),
1223        };
1224        let backend_input = GetObjectInput {
1225            bucket: req.input.bucket.clone(),
1226            key: req.input.key.clone(),
1227            range: Some(backend_range),
1228            ..Default::default()
1229        };
1230        let backend_req = S3Request {
1231            input: backend_input,
1232            method: req.method.clone(),
1233            uri: req.uri.clone(),
1234            headers: req.headers.clone(),
1235            extensions: http::Extensions::new(),
1236            credentials: req.credentials.clone(),
1237            region: req.region.clone(),
1238            service: req.service.clone(),
1239            trailing_headers: None,
1240        };
1241        let mut backend_resp = self.backend.get_object(backend_req).await?;
1242        let blob = backend_resp.output.body.take().ok_or_else(|| {
1243            S3Error::with_message(
1244                S3ErrorCode::InternalError,
1245                "backend partial GET returned empty body",
1246            )
1247        })?;
1248        let bytes = collect_blob(blob, self.max_body_bytes)
1249            .await
1250            .map_err(internal("collect partial body"))?;
1251
1252        // frame parse + decompress
1253        let mut combined = BytesMut::new();
1254        for frame in FrameIter::new(bytes) {
1255            let (header, payload) = frame.map_err(|e| {
1256                S3Error::with_message(
1257                    S3ErrorCode::InternalError,
1258                    format!("partial-range frame parse: {e}"),
1259                )
1260            })?;
1261            let chunk_manifest = ChunkManifest {
1262                codec: header.codec,
1263                original_size: header.original_size,
1264                compressed_size: header.compressed_size,
1265                crc32c: header.crc32c,
1266            };
1267            let decompressed = self
1268                .registry
1269                .decompress(payload, &chunk_manifest)
1270                .await
1271                .map_err(internal("partial-range decompress"))?;
1272            combined.extend_from_slice(&decompressed);
1273        }
1274        let combined = combined.freeze();
1275        let sliced = combined
1276            .slice(plan.slice_start_in_combined as usize..plan.slice_end_in_combined as usize);
1277
1278        // response 組立て
1279        let returned_size = sliced.len() as u64;
1280        backend_resp.output.content_length = Some(returned_size as i64);
1281        backend_resp.output.content_range = Some(format!(
1282            "bytes {client_start}-{}/{total_original}",
1283            client_end_exclusive - 1
1284        ));
1285        backend_resp.output.checksum_crc32 = None;
1286        backend_resp.output.checksum_crc32c = None;
1287        backend_resp.output.checksum_crc64nvme = None;
1288        backend_resp.output.checksum_sha1 = None;
1289        backend_resp.output.checksum_sha256 = None;
1290        backend_resp.output.e_tag = None;
1291        backend_resp.output.body = Some(bytes_to_blob(sliced));
1292        backend_resp.status = Some(http::StatusCode::PARTIAL_CONTENT);
1293
1294        let elapsed = get_start.elapsed();
1295        crate::metrics::record_get(
1296            "partial",
1297            plan.byte_end_exclusive - plan.byte_start,
1298            returned_size,
1299            elapsed.as_secs_f64(),
1300            true,
1301        );
1302        info!(
1303            op = "get_object",
1304            bucket = %req.input.bucket,
1305            key = %req.input.key,
1306            bytes_in = plan.byte_end_exclusive - plan.byte_start,
1307            bytes_out = returned_size,
1308            total_object_size = total_original,
1309            range = true,
1310            path = "sidecar-partial",
1311            latency_ms = elapsed.as_millis() as u64,
1312            "S4 partial Range GET via sidecar index"
1313        );
1314        Ok(backend_resp)
1315    }
1316
1317    /// `<key>.s4index` sidecar object を backend に書く。失敗しても本体 PUT は
1318    /// 成功扱いにしたいので、err は warn ログのみ (Range GET の partial path が
1319    /// 使えなくなるが、full read fallback で意味的には正しい結果を返す)。
1320    async fn write_sidecar(&self, bucket: &str, key: &str, index: &FrameIndex) {
1321        let bytes = encode_index(index);
1322        let len = bytes.len() as i64;
1323        let sidecar = sidecar_key(key);
1324        // v0.7 #49: synthetic re-entry URI must be percent-encoded; if
1325        // the (already legally-arbitrary) S3 key produces something we
1326        // cannot encode at all, drop the sidecar PUT (the GET path
1327        // falls back to a full read on a missing sidecar) instead of
1328        // panicking on `parse().unwrap()`.
1329        let uri = match safe_object_uri(bucket, &sidecar) {
1330            Ok(u) => u,
1331            Err(e) => {
1332                tracing::warn!(
1333                    bucket,
1334                    key,
1335                    "S4 write_sidecar skipped (key not URI-encodable): {e}"
1336                );
1337                return;
1338            }
1339        };
1340        let put_input = PutObjectInput {
1341            bucket: bucket.into(),
1342            key: sidecar,
1343            body: Some(bytes_to_blob(bytes)),
1344            content_length: Some(len),
1345            content_type: Some("application/x-s4-index".into()),
1346            ..Default::default()
1347        };
1348        let put_req = S3Request {
1349            input: put_input,
1350            method: http::Method::PUT,
1351            uri,
1352            headers: http::HeaderMap::new(),
1353            extensions: http::Extensions::new(),
1354            credentials: None,
1355            region: None,
1356            service: None,
1357            trailing_headers: None,
1358        };
1359        if let Err(e) = self.backend.put_object(put_req).await {
1360            tracing::warn!(
1361                bucket,
1362                key,
1363                "S4 write_sidecar failed (Range GET will fall back to full read): {e}"
1364            );
1365        }
1366    }
1367
1368    /// v0.8.4 #73 H-2: confirm that the sidecar we just decoded still
1369    /// describes the current backend object before we trust its frame
1370    /// offsets for a partial Range GET. The sidecar carries the source
1371    /// `etag` and `compressed_size` that were observed at PUT time; we
1372    /// HEAD the backend object and compare.
1373    ///
1374    /// Decision matrix:
1375    /// - sidecar `source_etag = None` (legacy v1 / build_index_from_body
1376    ///   that wasn't stamped) → return `true` (best-effort, preserves
1377    ///   pre-v0.8.4 behaviour for existing on-disk sidecars).
1378    /// - HEAD fails → return `false` (we can't tell either way; full GET
1379    ///   path will surface the real backend error to the client).
1380    /// - HEAD ETag matches → `true`.
1381    /// - HEAD ETag differs OR HEAD size differs from
1382    ///   `source_compressed_size` → `false` (sidecar stale or attacker-
1383    ///   written; fall back to full GET).
1384    async fn sidecar_version_binding_ok(
1385        &self,
1386        bucket: &str,
1387        key: &str,
1388        index: &FrameIndex,
1389    ) -> bool {
1390        let Some(ref expected_etag) = index.source_etag else {
1391            // Legacy sidecar without the v0.8.4 #73 H-2 binding —
1392            // back-compat: trust it (the partial fetch is the same
1393            // best-effort path that v0.8.3 and earlier shipped).
1394            return true;
1395        };
1396        let head_input = HeadObjectInput {
1397            bucket: bucket.into(),
1398            key: key.into(),
1399            ..Default::default()
1400        };
1401        let uri = match safe_object_uri(bucket, key) {
1402            Ok(u) => u,
1403            Err(_) => return false,
1404        };
1405        let head_req = S3Request {
1406            input: head_input,
1407            method: http::Method::HEAD,
1408            uri,
1409            headers: http::HeaderMap::new(),
1410            extensions: http::Extensions::new(),
1411            credentials: None,
1412            region: None,
1413            service: None,
1414            trailing_headers: None,
1415        };
1416        let head = match self.backend.head_object(head_req).await {
1417            Ok(r) => r.output,
1418            Err(e) => {
1419                tracing::debug!(
1420                    bucket,
1421                    key,
1422                    "S4 sidecar version-binding HEAD failed, falling back to full GET: {e}"
1423                );
1424                return false;
1425            }
1426        };
1427        // ETag is a strong-vs-weak enum; we compare on the unwrapped string
1428        // form (matches what the PUT path stamped — see below).
1429        let live_etag = head.e_tag.as_ref().map(|t| t.value());
1430        if live_etag != Some(expected_etag.as_str()) {
1431            tracing::debug!(
1432                bucket,
1433                key,
1434                "sidecar stale (ETag mismatch), falling back to full GET (sidecar={:?}, live={:?})",
1435                expected_etag,
1436                live_etag,
1437            );
1438            return false;
1439        }
1440        if let Some(expected_size) = index.source_compressed_size
1441            && let Some(live_size) = head.content_length
1442            && live_size as u64 != expected_size
1443        {
1444            tracing::debug!(
1445                bucket,
1446                key,
1447                "sidecar stale (size mismatch), falling back to full GET (sidecar={}, live={})",
1448                expected_size,
1449                live_size,
1450            );
1451            return false;
1452        }
1453        true
1454    }
1455
1456    /// `<key>.s4index` sidecar を backend から読み出す。なければ None。
1457    async fn read_sidecar(&self, bucket: &str, key: &str) -> Option<FrameIndex> {
1458        let sidecar = sidecar_key(key);
1459        // v0.7 #49: same encode-or-bail treatment as write_sidecar.
1460        let uri = safe_object_uri(bucket, &sidecar).ok()?;
1461        let get_input = GetObjectInput {
1462            bucket: bucket.into(),
1463            key: sidecar,
1464            ..Default::default()
1465        };
1466        let get_req = S3Request {
1467            input: get_input,
1468            method: http::Method::GET,
1469            uri,
1470            headers: http::HeaderMap::new(),
1471            extensions: http::Extensions::new(),
1472            credentials: None,
1473            region: None,
1474            service: None,
1475            trailing_headers: None,
1476        };
1477        let resp = self.backend.get_object(get_req).await.ok()?;
1478        let blob = resp.output.body?;
1479        let bytes = collect_blob(blob, 64 * 1024 * 1024).await.ok()?;
1480        decode_index(bytes).ok()
1481    }
1482
1483    /// Multipart object (frame 列) を解凍 → 元 bytes を再構築。
1484    ///
1485    /// **per-frame codec dispatch**: 各 frame header に codec_id が入っているので、
1486    /// frame ごとに registry が違う codec を呼ぶことができる。同一 object 内で
1487    /// 異なる codec が混在していても透過的に解凍可能 (parquet 風 mixed columns 等)。
1488    async fn decompress_multipart(&self, bytes: bytes::Bytes) -> S3Result<bytes::Bytes> {
1489        let mut out = BytesMut::new();
1490        for frame in FrameIter::new(bytes) {
1491            let (header, payload) = frame.map_err(|e| {
1492                S3Error::with_message(
1493                    S3ErrorCode::InternalError,
1494                    format!("multipart frame parse: {e}"),
1495                )
1496            })?;
1497            let chunk_manifest = ChunkManifest {
1498                codec: header.codec,
1499                original_size: header.original_size,
1500                compressed_size: header.compressed_size,
1501                crc32c: header.crc32c,
1502            };
1503            let decompressed = self
1504                .registry
1505                .decompress(payload, &chunk_manifest)
1506                .await
1507                .map_err(internal("multipart frame decompress"))?;
1508            out.extend_from_slice(&decompressed);
1509        }
1510        Ok(out.freeze())
1511    }
1512}
1513
1514/// Parse a CopySourceRange header value (`bytes=N-M`, `bytes=N-`, `bytes=-N`)
1515/// into the s3s::dto::Range used by the GetObject path. The S3 spec only
1516/// allows `bytes=N-M` for upload_part_copy (no suffix or open-ended), so
1517/// reject the other variants for parity with AWS.
1518fn parse_copy_source_range(s: &str) -> Result<s3s::dto::Range, String> {
1519    let rest = s
1520        .strip_prefix("bytes=")
1521        .ok_or_else(|| format!("CopySourceRange must start with 'bytes=', got {s:?}"))?;
1522    let (a, b) = rest
1523        .split_once('-')
1524        .ok_or_else(|| format!("CopySourceRange must be 'bytes=N-M', got {s:?}"))?;
1525    let first: u64 = a
1526        .parse()
1527        .map_err(|_| format!("CopySourceRange first byte not a number: {a:?}"))?;
1528    let last: u64 = b
1529        .parse()
1530        .map_err(|_| format!("CopySourceRange last byte not a number: {b:?}"))?;
1531    if last < first {
1532        return Err(format!("CopySourceRange last < first: {s:?}"));
1533    }
1534    Ok(s3s::dto::Range::Int {
1535        first,
1536        last: Some(last),
1537    })
1538}
1539
1540/// v0.5 #34: synthesize the backend storage key for a given
1541/// (logical key, version-id) pair on an Enabled-versioning bucket.
1542///
1543/// Uses the `__s4ver__/` infix because:
1544/// - it's not a substring of `.s4index` / `.s4ver` natural keys (no false-positive
1545///   listing filter collisions)
1546/// - directory-style separator keeps S3 console "browse by prefix" UX intact
1547///   (versions roll up under one virtual folder per object)
1548/// - human-readable on debug logs / `aws s3 ls`
1549///
1550/// `list_objects` / `list_objects_v2` / `list_object_versions` MUST filter
1551/// keys containing `.__s4ver__/` from results so customers don't see internal
1552/// shadow objects.
1553pub fn versioned_shadow_key(key: &str, version_id: &str) -> String {
1554    format!("{key}.__s4ver__/{version_id}")
1555}
1556
1557/// Test for the marker substring used by [`versioned_shadow_key`]. Cheap str
1558/// scan; both list_objects filter and the GET passthrough check use this.
1559fn is_versioning_shadow_key(key: &str) -> bool {
1560    key.contains(".__s4ver__/")
1561}
1562
1563/// v0.6 #42: wall-clock seconds since the UNIX epoch — fed to
1564/// `mfa::check_mfa` so the TOTP verifier can match the client's
1565/// authenticator app's view of "now". Falls back to `0` on the
1566/// (impossible-in-practice) clock-before-1970 path so the verifier
1567/// rejects rather than panicking.
1568fn current_unix_secs() -> u64 {
1569    std::time::SystemTime::now()
1570        .duration_since(std::time::UNIX_EPOCH)
1571        .map(|d| d.as_secs())
1572        .unwrap_or(0)
1573}
1574
1575/// v0.6 #42: translate an `MfaError` into the matching S3 wire error.
1576///
1577/// - `Missing` / `SerialMismatch` / `InvalidCode` → `403 AccessDenied`
1578///   (S3 spec for MFA Delete: every gating failure surfaces as
1579///   `AccessDenied`, not a separate `MFA*` code).
1580/// - `Malformed` → `400 InvalidRequest` (the request itself is
1581///   syntactically broken, not a permission issue).
1582fn mfa_error_to_s3(e: crate::mfa::MfaError) -> S3Error {
1583    match e {
1584        crate::mfa::MfaError::Missing => S3Error::with_message(
1585            S3ErrorCode::AccessDenied,
1586            "MFA token required for this operation",
1587        ),
1588        crate::mfa::MfaError::Malformed => {
1589            S3Error::with_message(S3ErrorCode::InvalidRequest, "malformed x-amz-mfa header")
1590        }
1591        crate::mfa::MfaError::SerialMismatch => S3Error::with_message(
1592            S3ErrorCode::AccessDenied,
1593            "MFA serial does not match configured device",
1594        ),
1595        crate::mfa::MfaError::InvalidCode => {
1596            S3Error::with_message(S3ErrorCode::AccessDenied, "invalid MFA code")
1597        }
1598    }
1599}
1600
1601fn is_multipart_object(metadata: &Option<Metadata>) -> bool {
1602    metadata
1603        .as_ref()
1604        .and_then(|m| m.get(META_MULTIPART))
1605        .map(|v| v == "true")
1606        .unwrap_or(false)
1607}
1608
1609const META_CODEC: &str = "s4-codec";
1610const META_ORIGINAL_SIZE: &str = "s4-original-size";
1611const META_COMPRESSED_SIZE: &str = "s4-compressed-size";
1612const META_CRC32C: &str = "s4-crc32c";
1613/// Multipart upload で per-part frame format を使ったオブジェクトであることを示す。
1614/// GET 時にこの flag を見て frame parser を起動する。
1615const META_MULTIPART: &str = "s4-multipart";
1616/// v0.2 #4: single-PUT でも S4F2 framed format で書かれていることを示す。
1617/// 旧 v0.1 single-PUT は raw 圧縮 bytes (この flag なし)。GET 時にこの flag を
1618/// 見て framed 経路 (= multipart と同じ FrameIter parse) に流す。
1619const META_FRAMED: &str = "s4-framed";
1620
1621fn is_framed_v2_object(metadata: &Option<Metadata>) -> bool {
1622    metadata
1623        .as_ref()
1624        .and_then(|m| m.get(META_FRAMED))
1625        .map(|v| v == "true")
1626        .unwrap_or(false)
1627}
1628
1629/// v0.4 #21: detect SSE-S4 by the metadata flag we set on PUT.
1630fn is_sse_encrypted(metadata: &Option<Metadata>) -> bool {
1631    metadata
1632        .as_ref()
1633        .and_then(|m| m.get("s4-encrypted"))
1634        .map(|v| v == "aes-256-gcm")
1635        .unwrap_or(false)
1636}
1637
1638/// v0.5 #27: pull the three SSE-C headers off an input struct. The S3
1639/// contract is "all three or none" — partial sets are a 400.
1640///
1641/// Returns `Ok(None)` when no SSE-C headers were sent (server-managed or
1642/// no encryption), `Ok(Some(material))` on validated client key, and
1643/// `Err` for malformed or partial inputs.
1644fn extract_sse_c_material(
1645    algorithm: &Option<String>,
1646    key: &Option<String>,
1647    md5: &Option<String>,
1648) -> S3Result<Option<crate::sse::CustomerKeyMaterial>> {
1649    match (algorithm, key, md5) {
1650        (None, None, None) => Ok(None),
1651        (Some(a), Some(k), Some(m)) => crate::sse::parse_customer_key_headers(a, k, m)
1652            .map(Some)
1653            .map_err(sse_c_error_to_s3),
1654        _ => Err(S3Error::with_message(
1655            S3ErrorCode::InvalidRequest,
1656            "SSE-C requires all three of: x-amz-server-side-encryption-customer-{algorithm,key,key-MD5}",
1657        )),
1658    }
1659}
1660
1661/// v0.5 #28: detect SSE-KMS request — `x-amz-server-side-encryption: aws:kms`.
1662/// Returns the key-id to wrap under, falling back to the gateway default.
1663fn extract_kms_key_id(
1664    sse: &Option<ServerSideEncryption>,
1665    sse_kms_key_id: &Option<String>,
1666    gateway_default: Option<&str>,
1667) -> Option<String> {
1668    let asks_for_kms = sse
1669        .as_ref()
1670        .map(|s| s.as_str() == ServerSideEncryption::AWS_KMS)
1671        .unwrap_or(false);
1672    if !asks_for_kms {
1673        return None;
1674    }
1675    sse_kms_key_id
1676        .clone()
1677        .or_else(|| gateway_default.map(str::to_owned))
1678}
1679
1680/// v0.5 #28: map kms module errors to AWS-shaped S3 error codes.
1681/// `KeyNotFound` is operator misconfig (400); `BackendUnavailable` is a
1682/// transient KMS outage (503). Other variants are 500 InternalError.
1683fn kms_error_to_s3(e: crate::kms::KmsError) -> S3Error {
1684    use crate::kms::KmsError as K;
1685    match e {
1686        K::KeyNotFound { key_id } => S3Error::with_message(
1687            S3ErrorCode::InvalidArgument,
1688            format!("KMS key not found: {key_id}"),
1689        ),
1690        K::BackendUnavailable { message } => S3Error::with_message(
1691            S3ErrorCode::ServiceUnavailable,
1692            format!("KMS backend unavailable: {message}"),
1693        ),
1694        other => S3Error::with_message(S3ErrorCode::InternalError, format!("KMS error: {other}")),
1695    }
1696}
1697
1698/// v0.5 #27: map sse module errors to AWS-shaped S3 error codes.
1699/// `WrongCustomerKey` → 403 AccessDenied (matches AWS behaviour);
1700/// `InvalidCustomerKey` / algorithm / required / unexpected → 400.
1701fn sse_c_error_to_s3(e: crate::sse::SseError) -> S3Error {
1702    use crate::sse::SseError as E;
1703    match e {
1704        E::WrongCustomerKey => S3Error::with_message(
1705            S3ErrorCode::AccessDenied,
1706            "SSE-C key does not match the key used at PUT time",
1707        ),
1708        E::InvalidCustomerKey { reason } => {
1709            S3Error::with_message(S3ErrorCode::InvalidArgument, format!("SSE-C: {reason}"))
1710        }
1711        E::CustomerKeyAlgorithmUnsupported { algo } => S3Error::with_message(
1712            S3ErrorCode::InvalidArgument,
1713            format!("SSE-C unsupported algorithm: {algo:?} (only AES256 is allowed)"),
1714        ),
1715        E::CustomerKeyRequired => S3Error::with_message(
1716            S3ErrorCode::InvalidRequest,
1717            "object is SSE-C encrypted; supply x-amz-server-side-encryption-customer-* headers",
1718        ),
1719        E::CustomerKeyUnexpected => S3Error::with_message(
1720            S3ErrorCode::InvalidRequest,
1721            "object is not SSE-C encrypted; do not send x-amz-server-side-encryption-customer-* headers",
1722        ),
1723        other => S3Error::with_message(S3ErrorCode::InternalError, format!("SSE error: {other}")),
1724    }
1725}
1726
1727fn extract_manifest(metadata: &Option<Metadata>) -> Option<ChunkManifest> {
1728    let m = metadata.as_ref()?;
1729    let codec = m
1730        .get(META_CODEC)
1731        .and_then(|s| s.parse::<CodecKind>().ok())?;
1732    let original_size = m.get(META_ORIGINAL_SIZE)?.parse().ok()?;
1733    let compressed_size = m.get(META_COMPRESSED_SIZE)?.parse().ok()?;
1734    let crc32c = m.get(META_CRC32C)?.parse().ok()?;
1735    Some(ChunkManifest {
1736        codec,
1737        original_size,
1738        compressed_size,
1739        crc32c,
1740    })
1741}
1742
1743fn write_manifest(metadata: &mut Option<Metadata>, manifest: &ChunkManifest) {
1744    let meta = metadata.get_or_insert_with(Default::default);
1745    meta.insert(META_CODEC.into(), manifest.codec.as_str().into());
1746    meta.insert(
1747        META_ORIGINAL_SIZE.into(),
1748        manifest.original_size.to_string(),
1749    );
1750    meta.insert(
1751        META_COMPRESSED_SIZE.into(),
1752        manifest.compressed_size.to_string(),
1753    );
1754    meta.insert(META_CRC32C.into(), manifest.crc32c.to_string());
1755}
1756
1757fn internal<E: std::fmt::Display>(prefix: &'static str) -> impl FnOnce(E) -> S3Error {
1758    move |e| S3Error::with_message(S3ErrorCode::InternalError, format!("{prefix}: {e}"))
1759}
1760
1761/// v0.6 #41: map a `select::SelectError` to the S3 error surface. AWS
1762/// uses a domain-specific `InvalidSqlExpression` code for parse / unsupported
1763/// errors, but s3s 0.13 doesn't expose that as a typed variant — we
1764/// fall back to the well-known `InvalidRequest` 400 with a descriptive
1765/// message that includes the original error context.
1766fn select_error_to_s3(e: crate::select::SelectError, fmt: &str) -> S3Error {
1767    use crate::select::SelectError;
1768    match e {
1769        SelectError::Parse(msg) => S3Error::with_message(
1770            S3ErrorCode::InvalidRequest,
1771            format!("SQL parse error: {msg}"),
1772        ),
1773        SelectError::UnsupportedFeature(msg) => S3Error::with_message(
1774            S3ErrorCode::InvalidRequest,
1775            format!("unsupported SQL feature: {msg}"),
1776        ),
1777        SelectError::RowEval(msg) => S3Error::with_message(
1778            S3ErrorCode::InvalidRequest,
1779            format!("SQL row evaluation error: {msg}"),
1780        ),
1781        SelectError::InputFormat(msg) => S3Error::with_message(
1782            S3ErrorCode::InvalidRequest,
1783            format!("{fmt} input format error: {msg}"),
1784        ),
1785    }
1786}
1787
1788/// v0.5 #30: parse the `x-amz-bypass-governance-retention` header into a
1789/// boolean flag. AWS S3 accepts `true` (case-insensitive); any other value
1790/// (including missing) is treated as `false`.
1791fn parse_bypass_governance_header(headers: &http::HeaderMap) -> bool {
1792    headers
1793        .get("x-amz-bypass-governance-retention")
1794        .and_then(|v| v.to_str().ok())
1795        .map(|s| s.eq_ignore_ascii_case("true"))
1796        .unwrap_or(false)
1797}
1798
1799/// Convert s3s `Timestamp` into a `chrono::DateTime<Utc>` by formatting it
1800/// as an RFC3339 string and re-parsing through `chrono`. The string format
1801/// avoids pulling the `time` crate (transitive dep of s3s, not declared by
1802/// s4-server) into our direct deps. Returns `None` if the format/parse fails
1803/// or the value is outside `chrono`'s supported range.
1804fn timestamp_to_chrono_utc(ts: &Timestamp) -> Option<chrono::DateTime<chrono::Utc>> {
1805    let mut buf = Vec::new();
1806    ts.format(s3s::dto::TimestampFormat::DateTime, &mut buf)
1807        .ok()?;
1808    let s = std::str::from_utf8(&buf).ok()?;
1809    chrono::DateTime::parse_from_rfc3339(s)
1810        .ok()
1811        .map(|dt| dt.with_timezone(&chrono::Utc))
1812}
1813
1814/// Inverse of [`timestamp_to_chrono_utc`] — emit RFC3339 (the s3s
1815/// `DateTime` wire format) and re-parse via `Timestamp::parse`.
1816fn chrono_utc_to_timestamp(dt: chrono::DateTime<chrono::Utc>) -> Timestamp {
1817    // chrono's RFC3339 output format matches s3s' parser ("...Z" with
1818    // optional sub-second precision). Fall back to UNIX_EPOCH if anything
1819    // unexpected happens — we never produce malformed strings, so this
1820    // branch is unreachable in practice.
1821    let s = dt.to_rfc3339_opts(chrono::SecondsFormat::Millis, true);
1822    Timestamp::parse(s3s::dto::TimestampFormat::DateTime, &s).unwrap_or_default()
1823}
1824
1825/// v0.6 #39: convert our internal [`crate::tagging::TagSet`] into the
1826/// s3s `Vec<Tag>` wire shape used on `GetObject/BucketTaggingOutput`.
1827/// Both halves of every pair land in the `Some(_)` slot — AWS marks
1828/// the field optional but always populates it on response.
1829fn tagset_to_aws(set: &crate::tagging::TagSet) -> Vec<Tag> {
1830    set.iter()
1831        .map(|(k, v)| Tag {
1832            key: Some(k.clone()),
1833            value: Some(v.clone()),
1834        })
1835        .collect()
1836}
1837
1838/// v0.6 #39: inverse of [`tagset_to_aws`] for input handlers. Missing
1839/// keys / values become empty strings (mirrors AWS, which rejects
1840/// `<Key/>` with InvalidTag at the parser layer; downstream
1841/// `TagSet::validate` then enforces our size limits).
1842fn aws_to_tagset(tags: &[Tag]) -> Result<crate::tagging::TagSet, crate::tagging::TagError> {
1843    let pairs = tags
1844        .iter()
1845        .map(|t| {
1846            (
1847                t.key.clone().unwrap_or_default(),
1848                t.value.clone().unwrap_or_default(),
1849            )
1850        })
1851        .collect();
1852    crate::tagging::TagSet::from_pairs(pairs)
1853}
1854
1855/// `Range` request を decompressed object サイズ `total` に適用して `(start, end_exclusive)`
1856/// を返す。`Range::Int { first, last }` は `bytes=first-last` (last は inclusive)、
1857/// `Range::Suffix { length }` は末尾 `length` byte。S3 仕様に準拠。
1858pub fn resolve_range(range: &s3s::dto::Range, total: u64) -> Result<(u64, u64), String> {
1859    if total == 0 {
1860        return Err("cannot range-get zero-length object".into());
1861    }
1862    match range {
1863        s3s::dto::Range::Int { first, last } => {
1864            let start = *first;
1865            let end_inclusive = match last {
1866                Some(l) => (*l).min(total - 1),
1867                None => total - 1,
1868            };
1869            if start > end_inclusive || start >= total {
1870                return Err(format!(
1871                    "range bytes={start}-{:?} out of object size {total}",
1872                    last
1873                ));
1874            }
1875            Ok((start, end_inclusive + 1))
1876        }
1877        s3s::dto::Range::Suffix { length } => {
1878            let len = (*length).min(total);
1879            Ok((total - len, total))
1880        }
1881    }
1882}
1883
1884#[async_trait::async_trait]
1885impl<B: S3> S3 for S4Service<B> {
1886    // === 圧縮を挟む path (PUT) ===
1887    #[tracing::instrument(
1888        name = "s4.put_object",
1889        skip(self, req),
1890        fields(bucket = %req.input.bucket, key = %req.input.key, codec, bytes_in, bytes_out, latency_ms)
1891    )]
1892    async fn put_object(
1893        &self,
1894        mut req: S3Request<PutObjectInput>,
1895    ) -> S3Result<S3Response<PutObjectOutput>> {
1896        let put_start = Instant::now();
1897        let put_bucket = req.input.bucket.clone();
1898        let put_key = req.input.key.clone();
1899        let access_preamble = self.access_log_preamble(&req);
1900        self.enforce_rate_limit(&req, &put_bucket)?;
1901        // v0.6 #39: parse `x-amz-tagging` (URL-encoded query string) so
1902        // the IAM policy gate sees the request's tags via
1903        // `s3:RequestObjectTag/<key>`. `existing_object_tags` is also
1904        // resolved from the Tagging manager (when wired) so
1905        // `s3:ExistingObjectTag/<key>` works on overwrite.
1906        let request_tags: Option<crate::tagging::TagSet> = req
1907            .input
1908            .tagging
1909            .as_deref()
1910            .map(crate::tagging::parse_tagging_header)
1911            .transpose()
1912            .map_err(|e| S3Error::with_message(S3ErrorCode::InvalidArgument, e.to_string()))?;
1913        let existing_tags: Option<crate::tagging::TagSet> = self
1914            .tagging
1915            .as_ref()
1916            .and_then(|m| m.get_object_tags(&put_bucket, &put_key));
1917        self.enforce_policy_with_extra(
1918            &req,
1919            "s3:PutObject",
1920            &put_bucket,
1921            Some(&put_key),
1922            request_tags.as_ref(),
1923            existing_tags.as_ref(),
1924        )?;
1925        // v0.5 #30: an Object Lock-protected key cannot be overwritten by
1926        // a non-versioned PUT (Suspended / Unversioned bucket). Enabled
1927        // bucket PUTs are exempt because they materialise a fresh
1928        // version under a shadow key (`<key>.__s4ver__/<vid>`) — the
1929        // locked version's bytes are untouched. The check mirrors the
1930        // delete path (Compliance never bypassable, Governance via the
1931        // bypass header, legal hold never).
1932        if let Some(mgr) = self.object_lock.as_ref()
1933            && let Some(state) = mgr.get(&put_bucket, &put_key)
1934        {
1935            let bucket_versioned_enabled = self
1936                .versioning
1937                .as_ref()
1938                .map(|v| v.state(&put_bucket) == crate::versioning::VersioningState::Enabled)
1939                .unwrap_or(false);
1940            if !bucket_versioned_enabled {
1941                let bypass = parse_bypass_governance_header(&req.headers);
1942                let now = chrono::Utc::now();
1943                if !state.can_delete(now, bypass) {
1944                    crate::metrics::record_policy_denial("s3:PutObject", &put_bucket);
1945                    return Err(S3Error::with_message(
1946                        S3ErrorCode::AccessDenied,
1947                        "Access Denied because object protected by object lock",
1948                    ));
1949                }
1950            }
1951        }
1952        // v0.5 #30: per-PUT explicit retention / legal hold (S3
1953        // `x-amz-object-lock-mode`, `x-amz-object-lock-retain-until-date`,
1954        // `x-amz-object-lock-legal-hold`). Captured before the body
1955        // moves into the backend; persisted into the manager only on
1956        // backend success below.
1957        let explicit_lock_mode: Option<crate::object_lock::LockMode> = req
1958            .input
1959            .object_lock_mode
1960            .as_ref()
1961            .and_then(|m| crate::object_lock::LockMode::from_aws_str(m.as_str()));
1962        let explicit_retain_until: Option<chrono::DateTime<chrono::Utc>> = req
1963            .input
1964            .object_lock_retain_until_date
1965            .as_ref()
1966            .and_then(timestamp_to_chrono_utc);
1967        let explicit_legal_hold_on: Option<bool> = req
1968            .input
1969            .object_lock_legal_hold_status
1970            .as_ref()
1971            .map(|s| s.as_str().eq_ignore_ascii_case("ON"));
1972        if let Some(blob) = req.input.body.take() {
1973            // Sample 4 KiB から codec を決定。streaming-aware codec なら streaming
1974            // compress fast path、そうでなければ従来の collect-then-compress。
1975            let (sample, rest_stream) = peek_sample(blob, SAMPLE_BYTES)
1976                .await
1977                .map_err(internal("peek put sample"))?;
1978            let sample_len = sample.len().min(SAMPLE_BYTES);
1979            // v0.8 #56: pass the request's Content-Length (when present) so
1980            // the sampling dispatcher can promote large objects to a GPU
1981            // codec. Chunked transfers (no Content-Length) keep CPU.
1982            let total_size_hint = req.input.content_length.and_then(|n| u64::try_from(n).ok());
1983            let kind = self
1984                .dispatcher
1985                .pick_with_size_hint(&sample[..sample_len], total_size_hint)
1986                .await;
1987
1988            // Passthrough buys nothing from S4F2 wrapping (no compression =
1989            // no per-chunk frame to skip past) and the +28-byte header
1990            // overhead breaks size-sensitive callers that expect a true
1991            // pass-through. So passthrough always uses the legacy raw-blob
1992            // path; only compressing codecs go through the framed path.
1993            let use_framed = supports_streaming_compress(kind) && kind != CodecKind::Passthrough;
1994            let (compressed, manifest, is_framed) = if use_framed {
1995                // streaming fast path: input は memory に collect しない
1996                let chained = chain_sample_with_rest(sample, rest_stream);
1997                debug!(
1998                    bucket = ?req.input.bucket,
1999                    key = ?req.input.key,
2000                    codec = kind.as_str(),
2001                    path = "streaming-framed",
2002                    "S4 put_object: compressing (streaming, S4F2 multi-frame)"
2003                );
2004                // v0.4 #16: pick the chunk size based on the request's
2005                // Content-Length when known, falling back to the 4 MiB
2006                // default for chunked transfers.
2007                let chunk_size = pick_chunk_size(req.input.content_length.map(|n| n as u64));
2008                // v0.8.4 #73 M2: pass the request's Content-Length so
2009                // streaming_compress_to_frames can fail-fast on a mid-PUT
2010                // truncation (client disconnect after sending half the
2011                // body). `None` is the chunked-Transfer-Encoding case
2012                // where the upstream genuinely doesn't know the size and
2013                // the backend's framing layer is the only truncation
2014                // signal we have.
2015                let expected_input_size =
2016                    req.input.content_length.and_then(|n| u64::try_from(n).ok());
2017                let (body, manifest) = streaming_compress_to_frames(
2018                    chained,
2019                    Arc::clone(&self.registry),
2020                    kind,
2021                    chunk_size,
2022                    expected_input_size,
2023                )
2024                .await
2025                .map_err(|e| match e {
2026                    s4_codec::CodecError::TruncatedStream { expected, got } => {
2027                        // 400 IncompleteBody: client advertised N bytes
2028                        // but disconnected after `got`. Mirrors AWS S3's
2029                        // canonical error code for the same shape so SDK
2030                        // retries kick in instead of treating the PUT as
2031                        // a successful upload of a half-body.
2032                        S3Error::with_message(
2033                            S3ErrorCode::IncompleteBody,
2034                            format!("PUT body truncated: expected {expected} bytes, got {got}"),
2035                        )
2036                    }
2037                    other => internal("streaming framed compress")(other),
2038                })?;
2039                (body, manifest, true)
2040            } else {
2041                // GPU codec 等で streaming-aware でないものは bytes-buffered path
2042                // (raw 圧縮 bytes、framed なし — back-compat 互換 path)
2043                let bytes = collect_with_sample(sample, rest_stream, self.max_body_bytes)
2044                    .await
2045                    .map_err(internal("collect put body (buffered path)"))?;
2046                debug!(
2047                    bucket = ?req.input.bucket,
2048                    key = ?req.input.key,
2049                    bytes = bytes.len(),
2050                    codec = kind.as_str(),
2051                    path = "buffered",
2052                    "S4 put_object: compressing (buffered, raw blob)"
2053                );
2054                // v0.8 #55: telemetry-returning compress so we can stamp
2055                // GPU-pipeline Prometheus metrics (`s4_gpu_compress_seconds`,
2056                // throughput gauge, OOM counter) for nvcomp / dietgpu codecs.
2057                // CPU codecs come back with `gpu_seconds = None` and the
2058                // stamp helper short-circuits — no extra cost on CPU path.
2059                let (compress_res, tel) = self.registry.compress_with_telemetry(bytes, kind).await;
2060                stamp_gpu_compress_telemetry(&tel);
2061                let (body, m) = compress_res.map_err(internal("registry compress"))?;
2062                (body, m, false)
2063            };
2064
2065            write_manifest(&mut req.input.metadata, &manifest);
2066            if is_framed {
2067                // v0.2 #4: framed body であることを GET 側に伝える meta flag。
2068                req.input
2069                    .metadata
2070                    .get_or_insert_with(Default::default)
2071                    .insert(META_FRAMED.into(), "true".into());
2072            }
2073            // 重要: content_length を圧縮後サイズで更新する。
2074            // これを忘れると下流 (aws-sdk-s3 → S3) が宣言サイズ分の bytes を
2075            // 待ち続けて RequestTimeout で失敗する (S3 仕様)。
2076            req.input.content_length = Some(compressed.len() as i64);
2077            // body を書き換えたので、客側が送ってきた original body 用の
2078            // checksum / MD5 ヘッダは無効化する (そのまま転送すると下流 S3 が
2079            // XAmzContentChecksumMismatch を返す)。S4 自身の整合性は
2080            // ChunkManifest.crc32c で担保している。
2081            req.input.checksum_algorithm = None;
2082            req.input.checksum_crc32 = None;
2083            req.input.checksum_crc32c = None;
2084            req.input.checksum_crc64nvme = None;
2085            req.input.checksum_sha1 = None;
2086            req.input.checksum_sha256 = None;
2087            req.input.content_md5 = None;
2088            let original_size = manifest.original_size;
2089            let compressed_size = manifest.compressed_size;
2090            let codec_label = manifest.codec.as_str();
2091            // framed body は GET 側で sidecar partial-fetch を効かせるため
2092            // build_index_from_body で sidecar を組み立てて backend に PUT する。
2093            let sidecar_index = if is_framed {
2094                s4_codec::index::build_index_from_body(&compressed).ok()
2095            } else {
2096                None
2097            };
2098            // v0.4 #21 / v0.5 #29 / v0.5 #27: encrypt-after-compress.
2099            // Precedence:
2100            //   - SSE-C headers present → per-request customer key (S4E3)
2101            //   - server-managed keyring configured → active key (S4E2)
2102            //   - neither → no encryption (raw compressed body)
2103            // The `s4-encrypted: aes-256-gcm` metadata flag is set in
2104            // both encrypted modes; the on-disk frame magic distinguishes
2105            // S4E1 / S4E2 / S4E3 so GET picks the right decrypt path.
2106            // v0.7 #48 BUG-2/3 fix: take() the SSE fields off req.input
2107            // so the encryption headers are NOT forwarded to the
2108            // backend. S4 owns the encrypt-then-store contract; if we
2109            // leave the headers in place, real S3-compat backends
2110            // (MinIO / AWS) try to apply their own SSE on top and
2111            // either reject (MinIO requires HTTPS for SSE-C) or fail
2112            // (MinIO has no KMS configured). MemoryBackend ignored
2113            // these so mock tests passed.
2114            let sse_c_alg = req.input.sse_customer_algorithm.take();
2115            let sse_c_key = req.input.sse_customer_key.take();
2116            let sse_c_md5 = req.input.sse_customer_key_md5.take();
2117            let sse_header = req.input.server_side_encryption.take();
2118            let sse_kms_key = req.input.ssekms_key_id.take();
2119            let sse_c_material = extract_sse_c_material(&sse_c_alg, &sse_c_key, &sse_c_md5)?;
2120            // v0.5 #28: SSE-KMS request? Resolves to None unless the
2121            // request asks for `aws:kms` AND a key id is available
2122            // (explicit header or gateway default). When set, we'll
2123            // generate a per-object DEK below.
2124            let kms_key_id = extract_kms_key_id(
2125                &sse_header,
2126                &sse_kms_key,
2127                self.kms_default_key_id.as_deref(),
2128            );
2129            // v0.5 #32: in compliance-strict mode, every PUT must
2130            // declare SSE — either client-supplied (SSE-C), KMS, or by
2131            // virtue of a server-side keyring being configured (which
2132            // applies SSE-S4 to every PUT automatically). Requests that
2133            // would otherwise land as plain compressed bytes are
2134            // rejected with 400 InvalidRequest.
2135            if self.compliance_strict
2136                && sse_c_material.is_none()
2137                && kms_key_id.is_none()
2138                && self.sse_keyring.is_none()
2139                && sse_header.as_ref().map(|s| s.as_str()) != Some(ServerSideEncryption::AES256)
2140            {
2141                return Err(S3Error::with_message(
2142                    S3ErrorCode::InvalidRequest,
2143                    "compliance-mode strict: PUT must include x-amz-server-side-encryption \
2144                     (AES256 or aws:kms) or x-amz-server-side-encryption-customer-* headers",
2145                ));
2146            }
2147            // SSE-C and SSE-KMS are mutually exclusive on a single PUT
2148            // (AWS S3 returns 400 InvalidArgument). SSE-C wins by spec.
2149            if sse_c_material.is_some() && kms_key_id.is_some() {
2150                return Err(S3Error::with_message(
2151                    S3ErrorCode::InvalidArgument,
2152                    "SSE-C and SSE-KMS cannot be used together on the same PUT",
2153                ));
2154            }
2155            // KMS path needs to call generate_dek().await before the
2156            // body_to_send branch; capture the result here.
2157            //
2158            // v0.8.1 #58: the plaintext DEK lives in three places
2159            // during one PUT:
2160            //
2161            //   1. The `Zeroizing<Vec<u8>>` returned by `generate_dek`
2162            //      — wiped when the binding `dek` falls out of scope at
2163            //      the end of this `if`-arm.
2164            //   2. The stack `[u8; 32]` we copy into for `SseSource::Kms`
2165            //      — wrapped in `Zeroizing<[u8; 32]>` so it's wiped when
2166            //      the outer `kms_wrap` `Option` is dropped at the end
2167            //      of `put_object`.
2168            //   3. AES-GCM internal key state inside the `aes-gcm`
2169            //      crate during `encrypt_with_source` — out of scope
2170            //      for this fix; tracked separately in v0.8.2.
2171            let kms_wrap: Option<(zeroize::Zeroizing<[u8; 32]>, crate::kms::WrappedDek)> =
2172                if let Some(ref key_id) = kms_key_id {
2173                    let kms = self.kms.as_ref().ok_or_else(|| {
2174                    S3Error::with_message(
2175                        S3ErrorCode::InvalidRequest,
2176                        "SSE-KMS requested but no --kms-local-dir / --kms-aws-region is configured on this gateway",
2177                    )
2178                })?;
2179                    // `dek` is `Zeroizing<Vec<u8>>`; deref + slice access
2180                    // works unchanged via `Deref<Target=Vec<u8>>`.
2181                    let (dek, wrapped) = kms.generate_dek(key_id).await.map_err(kms_error_to_s3)?;
2182                    if dek.len() != 32 {
2183                        return Err(S3Error::with_message(
2184                            S3ErrorCode::InternalError,
2185                            format!(
2186                                "KMS backend returned a DEK of {} bytes (expected 32)",
2187                                dek.len()
2188                            ),
2189                        ));
2190                    }
2191                    let mut dek_arr: zeroize::Zeroizing<[u8; 32]> =
2192                        zeroize::Zeroizing::new([0u8; 32]);
2193                    dek_arr.copy_from_slice(&dek);
2194                    // `dek` (the `Zeroizing<Vec<u8>>`) is dropped at the
2195                    // end of this scope, wiping the heap allocation.
2196                    Some((dek_arr, wrapped))
2197                } else {
2198                    None
2199                };
2200            // v0.7 #48 BUG-4 fix: stamp the SSE *type* into metadata
2201            // alongside `s4-encrypted` so HEAD (which doesn't fetch the
2202            // body) can echo the correct `x-amz-server-side-encryption`
2203            // value. Without this, HEAD on an SSE-KMS object would not
2204            // echo `aws:kms` because the frame magic is only available
2205            // on the body (which HEAD doesn't read).
2206            let body_to_send = if let Some(ref m) = sse_c_material {
2207                let meta = req.input.metadata.get_or_insert_with(Default::default);
2208                meta.insert("s4-encrypted".into(), "aes-256-gcm".into());
2209                meta.insert("s4-sse-type".into(), "AES256".into());
2210                meta.insert(
2211                    "s4-sse-c-key-md5".into(),
2212                    base64::engine::general_purpose::STANDARD.encode(m.key_md5),
2213                );
2214                crate::sse::encrypt_with_source(
2215                    &compressed,
2216                    crate::sse::SseSource::CustomerKey {
2217                        key: &m.key,
2218                        key_md5: &m.key_md5,
2219                    },
2220                )
2221            } else if let Some((ref dek, ref wrapped)) = kms_wrap {
2222                let meta = req.input.metadata.get_or_insert_with(Default::default);
2223                meta.insert("s4-encrypted".into(), "aes-256-gcm".into());
2224                meta.insert("s4-sse-type".into(), "aws:kms".into());
2225                meta.insert("s4-sse-kms-key-id".into(), wrapped.key_id.clone());
2226                // v0.8.1 #58: `dek` is `&Zeroizing<[u8; 32]>`; `SseSource::Kms`
2227                // wants `&[u8; 32]`. Rust auto-derefs `&Zeroizing<T>` to
2228                // `&T` here via `Deref<Target=T>`, so the binding picks
2229                // up the inner array reference without copying. The array
2230                // stays in the `Zeroizing` wrapper that owns it and gets
2231                // wiped when `kms_wrap` drops at the end of `put_object`.
2232                let dek_ref: &[u8; 32] = dek;
2233                crate::sse::encrypt_with_source(
2234                    &compressed,
2235                    crate::sse::SseSource::Kms {
2236                        dek: dek_ref,
2237                        wrapped,
2238                    },
2239                )
2240            } else if let Some(keyring) = self.sse_keyring.as_ref() {
2241                // SSE-S4 is server-driven transparent encryption; the
2242                // client didn't ask for SSE. We stamp `s4-encrypted`
2243                // (internal flag the GET path needs) but deliberately
2244                // do NOT stamp `s4-sse-type` — that lights up the HEAD
2245                // echo of `x-amz-server-side-encryption: AES256`,
2246                // which would falsely advertise AWS-style SSE-S3
2247                // semantics the operator didn't request.
2248                let meta = req.input.metadata.get_or_insert_with(Default::default);
2249                meta.insert("s4-encrypted".into(), "aes-256-gcm".into());
2250                // v0.8 #52: when `--sse-chunk-size > 0` is configured,
2251                // emit the chunked S4E5 frame so the matching GET can
2252                // stream-decrypt instead of buffering 5 GiB before
2253                // emitting a byte. Falls back to the buffered S4E2
2254                // frame at chunk_size=0 (default) so existing
2255                // deployments are bit-for-bit unchanged.
2256                if self.sse_chunk_size > 0 {
2257                    crate::sse::encrypt_v2_chunked(&compressed, keyring, self.sse_chunk_size)
2258                        .map_err(|e| {
2259                            S3Error::with_message(
2260                                S3ErrorCode::InternalError,
2261                                format!("SSE-S4 chunked encrypt failed: {e}"),
2262                            )
2263                        })?
2264                } else {
2265                    crate::sse::encrypt_v2(&compressed, keyring)
2266                }
2267            } else {
2268                compressed.clone()
2269            };
2270            // v0.6 #40: capture the about-to-be-sent body + metadata so
2271            // the replication dispatcher (run after the source PUT
2272            // succeeds) can hand the same backend bytes to the
2273            // destination bucket. `Bytes` clone is cheap (refcounted).
2274            let replication_body = body_to_send.clone();
2275            let replication_metadata = req.input.metadata.clone();
2276            // v0.7 #48 BUG-1 fix: SSE encryption (S4E1/E2/E3/E4 frames)
2277            // makes the body longer than the post-compression bytes
2278            // (header + nonce + tag overhead). The earlier
2279            // content_length stamp at compressed.len() is now stale, so
2280            // re-stamp from the actual bytes about to be sent or the
2281            // backend (real S3 / MinIO) rejects with
2282            // `StreamLengthMismatch`. MemoryBackend never validated
2283            // this, which is why mock-only tests passed.
2284            req.input.content_length = Some(body_to_send.len() as i64);
2285            req.input.body = Some(bytes_to_blob(body_to_send));
2286            // v0.5 #34: pre-allocate a version-id when the bucket is
2287            // Enabled, then redirect the backend storage key to the
2288            // shadow path so older versions survive newer PUTs.
2289            // Suspended / Unversioned buckets keep using the plain
2290            // `<key>` (S3 spec: Suspended overwrites the same backend
2291            // object). Pre-allocation (instead of recording after PUT)
2292            // ensures the shadow key + the response's
2293            // `x-amz-version-id` use the same vid.
2294            let pending_version: Option<crate::versioning::PutOutcome> = self
2295                .versioning
2296                .as_ref()
2297                .map(|mgr| mgr.state(&put_bucket))
2298                .map(|state| match state {
2299                    crate::versioning::VersioningState::Enabled => crate::versioning::PutOutcome {
2300                        version_id: crate::versioning::VersioningManager::new_version_id(),
2301                        versioned_response: true,
2302                    },
2303                    crate::versioning::VersioningState::Suspended
2304                    | crate::versioning::VersioningState::Unversioned => {
2305                        crate::versioning::PutOutcome {
2306                            version_id: crate::versioning::NULL_VERSION_ID.to_owned(),
2307                            versioned_response: false,
2308                        }
2309                    }
2310                });
2311            if let Some(ref pv) = pending_version
2312                && pv.versioned_response
2313            {
2314                req.input.key = versioned_shadow_key(&put_key, &pv.version_id);
2315            }
2316            // v0.8.4 #73 H-2: capture the to-be-stored body length BEFORE
2317            // the move into `req.input` is consumed by the backend call.
2318            // The sidecar's `source_compressed_size` is checked against
2319            // the live HEAD `Content-Length` on Range GET to detect a
2320            // backend-side mutation.
2321            let backend_object_size = req.input.content_length.and_then(|n| u64::try_from(n).ok());
2322            let mut backend_resp = self.backend.put_object(req).await;
2323            if let Some(mut idx) = sidecar_index
2324                && let Ok(ref resp) = backend_resp
2325                && idx.entries.len() > 1
2326            {
2327                // 1 chunk しかない (small object) なら sidecar は意味がない (=
2328                // partial fetch しても full body と同じ範囲) ので省略。
2329                // Sidecar は user-visible key で書く (latest version の
2330                // partial fetch path 用)。Old versions の Range GET は今 task
2331                // の scope 外 (full read fallback でも意味的には正しい)。
2332                //
2333                // v0.8.4 #73 H-2: stamp the version-binding fields the
2334                // GET path needs to detect a stale / attacker-written
2335                // sidecar. ETag comes from the backend's PUT response —
2336                // when missing (some backends don't return an ETag) we
2337                // synthesize a CRC-derived stable identifier so the
2338                // sidecar still binds to *something*; the GET HEAD will
2339                // see the same backend ETag (None vs None) and treat the
2340                // pair as consistent.
2341                let source_etag = resp.output.e_tag.as_ref().map(|t| t.value().to_string());
2342                idx.source_etag = source_etag;
2343                idx.source_compressed_size = backend_object_size;
2344                self.write_sidecar(&put_bucket, &put_key, &idx).await;
2345            }
2346            // v0.5 #34: commit the new version into the manager only on
2347            // backend success. Use the pre-allocated vid so the response
2348            // header and the chain entry agree.
2349            if let (Some(mgr), Some(pv), Ok(resp)) = (
2350                self.versioning.as_ref(),
2351                pending_version.as_ref(),
2352                backend_resp.as_mut(),
2353            ) {
2354                let etag = resp
2355                    .output
2356                    .e_tag
2357                    .clone()
2358                    .map(ETag::into_value)
2359                    .unwrap_or_else(|| format!("\"crc32c-{}\"", manifest.crc32c));
2360                let now = chrono::Utc::now();
2361                mgr.commit_put_with_version(
2362                    &put_bucket,
2363                    &put_key,
2364                    crate::versioning::VersionEntry {
2365                        version_id: pv.version_id.clone(),
2366                        etag,
2367                        size: original_size,
2368                        is_delete_marker: false,
2369                        created_at: now,
2370                    },
2371                );
2372                if pv.versioned_response {
2373                    resp.output.version_id = Some(pv.version_id.clone());
2374                }
2375            }
2376            // v0.5 #27: AWS S3 echoes the SSE-C headers back on success
2377            // so the client knows the server actually applied the
2378            // requested algorithm and which key fingerprint matched.
2379            if let (Some(m), Ok(resp)) = (sse_c_material.as_ref(), backend_resp.as_mut()) {
2380                resp.output.sse_customer_algorithm = Some(crate::sse::SSE_C_ALGORITHM.into());
2381                resp.output.sse_customer_key_md5 =
2382                    Some(base64::engine::general_purpose::STANDARD.encode(m.key_md5));
2383            }
2384            // v0.5 #28: SSE-KMS echo — `aws:kms` + the canonical key id
2385            // the backend returned (AWS KMS returns the ARN even when
2386            // the request used an alias).
2387            if let (Some((_, wrapped)), Ok(resp)) = (kms_wrap.as_ref(), backend_resp.as_mut()) {
2388                resp.output.server_side_encryption = Some(ServerSideEncryption::from_static(
2389                    ServerSideEncryption::AWS_KMS,
2390                ));
2391                resp.output.ssekms_key_id = Some(wrapped.key_id.clone());
2392            }
2393            // v0.5 #30: persist any per-PUT explicit retention / legal
2394            // hold the client supplied, then auto-apply the bucket
2395            // default (no-op when state is already populated). The
2396            // explicit fields take precedence — the bucket-default
2397            // helper bails out as soon as it sees any retention.
2398            if let (Some(mgr), Ok(_)) = (self.object_lock.as_ref(), backend_resp.as_ref()) {
2399                if explicit_lock_mode.is_some()
2400                    || explicit_retain_until.is_some()
2401                    || explicit_legal_hold_on.is_some()
2402                {
2403                    let mut state = mgr.get(&put_bucket, &put_key).unwrap_or_default();
2404                    if let Some(m) = explicit_lock_mode {
2405                        state.mode = Some(m);
2406                    }
2407                    if let Some(u) = explicit_retain_until {
2408                        state.retain_until = Some(u);
2409                    }
2410                    if let Some(lh) = explicit_legal_hold_on {
2411                        state.legal_hold_on = lh;
2412                    }
2413                    mgr.set(&put_bucket, &put_key, state);
2414                }
2415                mgr.apply_default_on_put(&put_bucket, &put_key, chrono::Utc::now());
2416            }
2417            let _ = (original_size, compressed_size); // mute unused warnings
2418            let elapsed = put_start.elapsed();
2419            crate::metrics::record_put(
2420                codec_label,
2421                original_size,
2422                compressed_size,
2423                elapsed.as_secs_f64(),
2424                backend_resp.is_ok(),
2425            );
2426            // v0.4 #20: structured access-log entry (best-effort).
2427            self.record_access(
2428                access_preamble,
2429                "REST.PUT.OBJECT",
2430                &put_bucket,
2431                Some(&put_key),
2432                if backend_resp.is_ok() { 200 } else { 500 },
2433                compressed_size,
2434                original_size,
2435                elapsed.as_millis() as u64,
2436                backend_resp.as_ref().err().map(|e| e.code().as_str()),
2437            )
2438            .await;
2439            info!(
2440                op = "put_object",
2441                bucket = %put_bucket,
2442                key = %put_key,
2443                codec = codec_label,
2444                bytes_in = original_size,
2445                bytes_out = compressed_size,
2446                ratio = format!(
2447                    "{:.3}",
2448                    if original_size == 0 { 1.0 } else { compressed_size as f64 / original_size as f64 }
2449                ),
2450                latency_ms = elapsed.as_millis() as u64,
2451                ok = backend_resp.is_ok(),
2452                "S4 put completed"
2453            );
2454            // v0.6 #35: fire bucket-notification destinations (best-effort,
2455            // detached). Skipped when no manager is attached or when the
2456            // bucket has no rule matching `s3:ObjectCreated:Put` for this
2457            // key.
2458            if backend_resp.is_ok()
2459                && let Some(mgr) = self.notifications.as_ref()
2460            {
2461                let dests = mgr.match_destinations(
2462                    &put_bucket,
2463                    &crate::notifications::EventType::ObjectCreatedPut,
2464                    &put_key,
2465                );
2466                if !dests.is_empty() {
2467                    let etag = backend_resp
2468                        .as_ref()
2469                        .ok()
2470                        .and_then(|r| r.output.e_tag.clone())
2471                        .map(ETag::into_value);
2472                    let version_id = pending_version
2473                        .as_ref()
2474                        .filter(|pv| pv.versioned_response)
2475                        .map(|pv| pv.version_id.clone());
2476                    tokio::spawn(crate::notifications::dispatch_event(
2477                        Arc::clone(mgr),
2478                        put_bucket.clone(),
2479                        put_key.clone(),
2480                        crate::notifications::EventType::ObjectCreatedPut,
2481                        Some(original_size),
2482                        etag,
2483                        version_id,
2484                        format!("S4-{}", uuid::Uuid::new_v4()),
2485                    ));
2486                }
2487            }
2488            // v0.6 #39: persist parsed `x-amz-tagging` tags into the
2489            // tagging manager on a successful PUT. AWS PutObject's
2490            // tagging is a full-replace operation (not a merge), so
2491            // any pre-existing entry for `(bucket, key)` is overwritten.
2492            if backend_resp.is_ok()
2493                && let (Some(mgr), Some(tags)) = (self.tagging.as_ref(), request_tags.clone())
2494            {
2495                mgr.put_object_tags(&put_bucket, &put_key, tags);
2496            }
2497            // v0.6 #40: cross-bucket replication fire-point. On
2498            // successful source PUT, consult the replication manager;
2499            // when an enabled rule matches, mark the source key
2500            // `Pending` and spawn a detached task that PUTs the same
2501            // backend bytes + metadata to the rule's destination
2502            // bucket. The dispatcher itself records `Completed` /
2503            // `Failed` and bumps the drop counter on retry-budget
2504            // exhaustion.
2505            self.spawn_replication_if_matched(
2506                &put_bucket,
2507                &put_key,
2508                &request_tags,
2509                &replication_body,
2510                &replication_metadata,
2511                backend_resp.is_ok(),
2512                pending_version.as_ref(),
2513            );
2514            return backend_resp;
2515        }
2516        // Body-less PUT (rare: zero-length object). Mirror the body-full
2517        // versioning hooks so list_object_versions / GET-by-version still see
2518        // empty-body objects in the chain.
2519        let pending_version: Option<crate::versioning::PutOutcome> = self
2520            .versioning
2521            .as_ref()
2522            .map(|mgr| mgr.state(&put_bucket))
2523            .map(|state| match state {
2524                crate::versioning::VersioningState::Enabled => crate::versioning::PutOutcome {
2525                    version_id: crate::versioning::VersioningManager::new_version_id(),
2526                    versioned_response: true,
2527                },
2528                _ => crate::versioning::PutOutcome {
2529                    version_id: crate::versioning::NULL_VERSION_ID.to_owned(),
2530                    versioned_response: false,
2531                },
2532            });
2533        if let Some(ref pv) = pending_version
2534            && pv.versioned_response
2535        {
2536            req.input.key = versioned_shadow_key(&put_key, &pv.version_id);
2537        }
2538        let mut backend_resp = self.backend.put_object(req).await;
2539        if let (Some(mgr), Some(pv), Ok(resp)) = (
2540            self.versioning.as_ref(),
2541            pending_version.as_ref(),
2542            backend_resp.as_mut(),
2543        ) {
2544            let etag = resp
2545                .output
2546                .e_tag
2547                .clone()
2548                .map(ETag::into_value)
2549                .unwrap_or_default();
2550            let now = chrono::Utc::now();
2551            mgr.commit_put_with_version(
2552                &put_bucket,
2553                &put_key,
2554                crate::versioning::VersionEntry {
2555                    version_id: pv.version_id.clone(),
2556                    etag,
2557                    size: 0,
2558                    is_delete_marker: false,
2559                    created_at: now,
2560                },
2561            );
2562            if pv.versioned_response {
2563                resp.output.version_id = Some(pv.version_id.clone());
2564            }
2565        }
2566        // v0.5 #30: same explicit-then-default lock-state commit as the
2567        // body-bearing branch above, so a zero-length PUT also picks up
2568        // bucket-default retention.
2569        if let (Some(mgr), Ok(_)) = (self.object_lock.as_ref(), backend_resp.as_ref()) {
2570            if explicit_lock_mode.is_some()
2571                || explicit_retain_until.is_some()
2572                || explicit_legal_hold_on.is_some()
2573            {
2574                let mut state = mgr.get(&put_bucket, &put_key).unwrap_or_default();
2575                if let Some(m) = explicit_lock_mode {
2576                    state.mode = Some(m);
2577                }
2578                if let Some(u) = explicit_retain_until {
2579                    state.retain_until = Some(u);
2580                }
2581                if let Some(lh) = explicit_legal_hold_on {
2582                    state.legal_hold_on = lh;
2583                }
2584                mgr.set(&put_bucket, &put_key, state);
2585            }
2586            mgr.apply_default_on_put(&put_bucket, &put_key, chrono::Utc::now());
2587        }
2588        // v0.6 #35: same notification fire-point as the body-bearing PUT
2589        // branch above (zero-length objects still match `ObjectCreated:Put`
2590        // rules per the AWS event taxonomy).
2591        if backend_resp.is_ok()
2592            && let Some(mgr) = self.notifications.as_ref()
2593        {
2594            let dests = mgr.match_destinations(
2595                &put_bucket,
2596                &crate::notifications::EventType::ObjectCreatedPut,
2597                &put_key,
2598            );
2599            if !dests.is_empty() {
2600                let etag = backend_resp
2601                    .as_ref()
2602                    .ok()
2603                    .and_then(|r| r.output.e_tag.clone())
2604                    .map(ETag::into_value);
2605                let version_id = pending_version
2606                    .as_ref()
2607                    .filter(|pv| pv.versioned_response)
2608                    .map(|pv| pv.version_id.clone());
2609                tokio::spawn(crate::notifications::dispatch_event(
2610                    Arc::clone(mgr),
2611                    put_bucket.clone(),
2612                    put_key.clone(),
2613                    crate::notifications::EventType::ObjectCreatedPut,
2614                    Some(0),
2615                    etag,
2616                    version_id,
2617                    format!("S4-{}", uuid::Uuid::new_v4()),
2618                ));
2619            }
2620        }
2621        // v0.6 #39: persist parsed `x-amz-tagging` for the body-less
2622        // (zero-length) PUT branch too — same shape as the body-bearing
2623        // branch above.
2624        if backend_resp.is_ok()
2625            && let (Some(mgr), Some(tags)) = (self.tagging.as_ref(), request_tags.clone())
2626        {
2627            mgr.put_object_tags(&put_bucket, &put_key, tags);
2628        }
2629        // v0.6 #40: cross-bucket replication for the zero-length PUT
2630        // branch — same shape as the body-bearing branch above.
2631        // v0.8.2 #61: pass `pending_version` so a versioned source's
2632        // destination receives the same shadow-key path.
2633        self.spawn_replication_if_matched(
2634            &put_bucket,
2635            &put_key,
2636            &request_tags,
2637            &bytes::Bytes::new(),
2638            &None,
2639            backend_resp.is_ok(),
2640            pending_version.as_ref(),
2641        );
2642        backend_resp
2643    }
2644
2645    // === 圧縮を解く path (GET) ===
2646    #[tracing::instrument(
2647        name = "s4.get_object",
2648        skip(self, req),
2649        fields(bucket = %req.input.bucket, key = %req.input.key, codec, bytes_out, range, path)
2650    )]
2651    async fn get_object(
2652        &self,
2653        mut req: S3Request<GetObjectInput>,
2654    ) -> S3Result<S3Response<GetObjectOutput>> {
2655        let get_start = Instant::now();
2656        let get_bucket = req.input.bucket.clone();
2657        let get_key = req.input.key.clone();
2658        self.enforce_rate_limit(&req, &get_bucket)?;
2659        self.enforce_policy(&req, "s3:GetObject", &get_bucket, Some(&get_key))?;
2660        // Range request の事前検出 (decompress 後 slice する path に使う)。
2661        let range_request = req.input.range.take();
2662        // v0.5 #27: pull SSE-C material from the input headers before
2663        // the request is moved into the backend. A header parse error
2664        // fails fast (no body fetch). The material is consumed below
2665        // when decrypting an S4E3-framed body; the SSE-C headers on
2666        // `req.input` are cleared so the backend doesn't see them.
2667        let sse_c_alg = req.input.sse_customer_algorithm.take();
2668        let sse_c_key = req.input.sse_customer_key.take();
2669        let sse_c_md5 = req.input.sse_customer_key_md5.take();
2670        let get_sse_c_material = extract_sse_c_material(&sse_c_alg, &sse_c_key, &sse_c_md5)?;
2671
2672        // v0.5 #34: route the GET through the VersioningManager when
2673        // attached AND the bucket is in a versioning-aware state.
2674        // Resolves which version to fetch (explicit `?versionId=` query
2675        // param vs. chain latest), translates a delete-marker into 404
2676        // NoSuchKey, and rewrites the backend storage key to the shadow
2677        // path (`<key>.__s4ver__/<vid>`) for non-null Enabled-bucket
2678        // versions. `resolved_version_id` is stamped onto the response
2679        // so clients see a coherent `x-amz-version-id` header.
2680        //
2681        // When the bucket is Unversioned (or no manager attached), the
2682        // chain-resolution step is skipped and the request flows
2683        // through the existing single-key path unchanged.
2684        let resolved_version_id: Option<String> = match self.versioning.as_ref() {
2685            Some(mgr)
2686                if mgr.state(&get_bucket) != crate::versioning::VersioningState::Unversioned =>
2687            {
2688                let req_vid = req.input.version_id.take();
2689                let entry = match req_vid.as_deref() {
2690                    Some(vid) => {
2691                        mgr.lookup_version(&get_bucket, &get_key, vid)
2692                            .ok_or_else(|| {
2693                                S3Error::with_message(
2694                                    S3ErrorCode::NoSuchVersion,
2695                                    format!("no such version: {vid}"),
2696                                )
2697                            })?
2698                    }
2699                    None => mgr.lookup_latest(&get_bucket, &get_key).ok_or_else(|| {
2700                        S3Error::with_message(
2701                            S3ErrorCode::NoSuchKey,
2702                            format!("no such key: {get_key}"),
2703                        )
2704                    })?,
2705                };
2706                if entry.is_delete_marker {
2707                    // S3 spec: GET without versionId on a
2708                    // delete-marker latest → 404 NoSuchKey + the
2709                    // response carries `x-amz-delete-marker: true`.
2710                    // GET with explicit versionId pointing at a delete
2711                    // marker → 405 MethodNotAllowed; we surface
2712                    // NoSuchKey here for both since s3s collapses them
2713                    // into the same not-found error path.
2714                    return Err(S3Error::with_message(
2715                        S3ErrorCode::NoSuchKey,
2716                        format!("delete marker is the current version of {get_key}"),
2717                    ));
2718                }
2719                if entry.version_id != crate::versioning::NULL_VERSION_ID {
2720                    req.input.key = versioned_shadow_key(&get_key, &entry.version_id);
2721                }
2722                Some(entry.version_id)
2723            }
2724            _ => None,
2725        };
2726
2727        // ====== Range GET の partial-fetch fast path (sidecar index 利用) ======
2728        // sidecar `<key>.s4index` が存在し、multipart-framed object であれば
2729        // 必要 frame だけを backend に Range GET し帯域節約する。
2730        //
2731        // v0.8.4 #73 H-2: BEFORE trusting the sidecar's frame offsets,
2732        // verify the source object hasn't been overwritten / mutated since
2733        // the sidecar was stamped. The sidecar carries the backend ETag
2734        // captured at PUT time (`source_etag`); a HEAD against the current
2735        // backend object tells us the live ETag. If they disagree we treat
2736        // the sidecar as stale and fall through to the full-GET path —
2737        // returning the wrong frames for a Range request would surface as
2738        // a CRC mismatch deeper in the stack but would also potentially
2739        // disclose unrelated frames if a hostile operator wrote the
2740        // sidecar themselves. Fail-open to "full read" is the safe default.
2741        //
2742        // Legacy v1 sidecars (no `source_etag` populated) keep the old
2743        // best-effort behaviour so existing on-disk indexes don't suddenly
2744        // start missing the partial-fetch path.
2745        if let Some(ref r) = range_request
2746            && let Some(index) = self.read_sidecar(&req.input.bucket, &req.input.key).await
2747            && self
2748                .sidecar_version_binding_ok(&req.input.bucket, &req.input.key, &index)
2749                .await
2750        {
2751            let total = index.total_original_size();
2752            let (start, end_exclusive) = match resolve_range(r, total) {
2753                Ok(v) => v,
2754                Err(e) => {
2755                    return Err(S3Error::with_message(S3ErrorCode::InvalidRange, e));
2756                }
2757            };
2758            if let Some(plan) = index.lookup_range(start, end_exclusive) {
2759                return self
2760                    .partial_range_get(&req, plan, start, end_exclusive, total, get_start)
2761                    .await;
2762            }
2763        }
2764        let mut resp = self.backend.get_object(req).await?;
2765        // v0.5 #34: stamp the resolved version-id so the client sees a
2766        // coherent `x-amz-version-id` header (only for chains owned by
2767        // the manager — Unversioned buckets / no-manager paths never
2768        // set this).
2769        if let Some(ref vid) = resolved_version_id {
2770            resp.output.version_id = Some(vid.clone());
2771        }
2772        let is_multipart = is_multipart_object(&resp.output.metadata);
2773        let is_framed_v2 = is_framed_v2_object(&resp.output.metadata);
2774        // v0.2 #4: framed-v2 single-PUT は多 frame parse が必要なので
2775        // multipart と同じ path に流す。
2776        let needs_frame_parse = is_multipart || is_framed_v2;
2777        let manifest_opt = extract_manifest(&resp.output.metadata);
2778
2779        if !needs_frame_parse && manifest_opt.is_none() {
2780            // S4 が書いていないオブジェクトは透過 (raw bucket pre-existing object 等)
2781            debug!("S4 get_object: object lacks s4-codec metadata, returning as-is");
2782            return Ok(resp);
2783        }
2784
2785        if let Some(blob) = resp.output.body.take() {
2786            // v0.4 #21 / v0.5 #27: if the object was stored under SSE
2787            // (metadata flag `s4-encrypted: aes-256-gcm`), decrypt
2788            // before any frame parse / streaming decompress. Encrypted
2789            // bodies are opaque to the codec; this also forces the
2790            // buffered path because AES-GCM needs the full body for tag
2791            // verify. SSE-C uses the per-request customer key, SSE-S4
2792            // falls back to the configured keyring.
2793            let blob = if is_sse_encrypted(&resp.output.metadata) {
2794                let body = collect_blob(blob, self.max_body_bytes)
2795                    .await
2796                    .map_err(internal("collect SSE-encrypted body"))?;
2797                // v0.5 #28: peek the frame magic to route the right
2798                // decrypt path. S4E4 means SSE-KMS — unwrap the DEK
2799                // through the KMS backend (async). S4E1/E2/E3 take
2800                // the sync path (keyring or customer key).
2801                //
2802                // v0.8 #52 (S4E5) / v0.8.1 #57 (S4E6): the chunked
2803                // SSE-S4 frames take the *streaming* path — we hand
2804                // the response body a per-chunk verify-and-emit
2805                // Stream so the client sees chunk 0 plaintext after
2806                // one chunk-worth of AES-GCM verify (vs. waiting
2807                // for the whole body's tag), and the gateway no
2808                // longer needs to materialize the full plaintext
2809                // in memory before responding. SSE-C is out of
2810                // scope for the chunked path (chunked S4E3 is a
2811                // follow-up), so this branch requires the SSE-S4
2812                // keyring to be wired and `get_sse_c_material` to
2813                // be absent — otherwise we surface a clear
2814                // misconfiguration error instead of silently
2815                // falling through to the buffered chunked path.
2816                if matches!(crate::sse::peek_magic(&body), Some("S4E5") | Some("S4E6"))
2817                    && get_sse_c_material.is_none()
2818                {
2819                    let keyring_arc = self.sse_keyring.clone().ok_or_else(|| {
2820                        S3Error::with_message(
2821                            S3ErrorCode::InvalidRequest,
2822                            "object is SSE-S4 encrypted (S4E5/S4E6) but no --sse-s4-key is configured on this gateway",
2823                        )
2824                    })?;
2825                    let body_len = body.len() as u64;
2826                    let stream = crate::sse::decrypt_chunked_stream(body, keyring_arc.as_ref());
2827                    // Stream is `'static` (the keyring borrow is
2828                    // consumed up front; the cipher lives inside
2829                    // the stream state — see decrypt_chunked_stream
2830                    // doc), so we can move it straight into a
2831                    // StreamingBlob without lifetime gymnastics.
2832                    use futures::StreamExt;
2833                    let mapped = stream.map(|r| {
2834                        r.map_err(|e| std::io::Error::other(format!("SSE-S4 chunked decrypt: {e}")))
2835                    });
2836                    use s3s::dto::StreamingBlob;
2837                    resp.output.body = Some(StreamingBlob::wrap(mapped));
2838                    // Plaintext content_length is unknown until all
2839                    // chunks have been verified; null it out so the
2840                    // ByteStream wrapper reports `unknown` to the
2841                    // HTTP layer (which then emits chunked transfer-
2842                    // encoding) rather than lying about the size.
2843                    resp.output.content_length = None;
2844                    // The backend's checksums + ETag describe the
2845                    // encrypted body (S4E5/S4E6 wire format), not
2846                    // the plaintext we're about to stream — clear them
2847                    // so the AWS SDK doesn't fail the GET with a
2848                    // ChecksumMismatch on a successful round-trip.
2849                    // Mirrors the streaming-zstd path at L1180-1185.
2850                    resp.output.checksum_crc32 = None;
2851                    resp.output.checksum_crc32c = None;
2852                    resp.output.checksum_crc64nvme = None;
2853                    resp.output.checksum_sha1 = None;
2854                    resp.output.checksum_sha256 = None;
2855                    resp.output.e_tag = None;
2856                    let elapsed = get_start.elapsed();
2857                    crate::metrics::record_get(
2858                        "sse-s4-chunked",
2859                        body_len,
2860                        body_len,
2861                        elapsed.as_secs_f64(),
2862                        true,
2863                    );
2864                    return Ok(resp);
2865                }
2866                let plain = match crate::sse::peek_magic(&body) {
2867                    Some("S4E4") => {
2868                        let kms = self.kms.as_ref().ok_or_else(|| {
2869                            S3Error::with_message(
2870                                S3ErrorCode::InvalidRequest,
2871                                "object is SSE-KMS encrypted but no --kms-local-dir / --kms-aws-region is configured on this gateway",
2872                            )
2873                        })?;
2874                        let kms_ref: &dyn crate::kms::KmsBackend = kms.as_ref();
2875                        crate::sse::decrypt_with_kms(&body, kms_ref)
2876                            .await
2877                            .map_err(|e| match e {
2878                                crate::sse::SseError::KmsBackend(k) => kms_error_to_s3(k),
2879                                other => S3Error::with_message(
2880                                    S3ErrorCode::InternalError,
2881                                    format!("SSE-KMS decrypt failed: {other}"),
2882                                ),
2883                            })?
2884                    }
2885                    _ => {
2886                        if let Some(ref m) = get_sse_c_material {
2887                            crate::sse::decrypt(
2888                                &body,
2889                                crate::sse::SseSource::CustomerKey {
2890                                    key: &m.key,
2891                                    key_md5: &m.key_md5,
2892                                },
2893                            )
2894                            .map_err(sse_c_error_to_s3)?
2895                        } else {
2896                            let keyring = self.sse_keyring.as_ref().ok_or_else(|| {
2897                                S3Error::with_message(
2898                                    S3ErrorCode::InvalidRequest,
2899                                    "object is SSE-S4 encrypted but no --sse-s4-key is configured on this gateway",
2900                                )
2901                            })?;
2902                            crate::sse::decrypt(&body, keyring).map_err(|e| {
2903                                S3Error::with_message(
2904                                    S3ErrorCode::InternalError,
2905                                    format!("SSE-S4 decrypt failed: {e}"),
2906                                )
2907                            })?
2908                        }
2909                    }
2910                };
2911                // v0.5 #28: parse out the on-disk wrapped DEK's key id
2912                // so the GET response can echo `x-amz-server-side-encryption-aws-kms-key-id`.
2913                if matches!(crate::sse::peek_magic(&body), Some("S4E4"))
2914                    && let Ok(hdr) = crate::sse::parse_s4e4_header(&body)
2915                {
2916                    resp.output.server_side_encryption = Some(ServerSideEncryption::from_static(
2917                        ServerSideEncryption::AWS_KMS,
2918                    ));
2919                    resp.output.ssekms_key_id = Some(hdr.key_id.to_string());
2920                }
2921                bytes_to_blob(plain)
2922            } else if let Some(ref m) = get_sse_c_material {
2923                // Client sent SSE-C headers for an unencrypted object —
2924                // mirror AWS S3's 400 InvalidRequest.
2925                let _ = m;
2926                return Err(sse_c_error_to_s3(
2927                    crate::sse::SseError::CustomerKeyUnexpected,
2928                ));
2929            } else {
2930                blob
2931            };
2932            // v0.5 #27: SSE-C echo on success — algorithm + key MD5
2933            // tell the client that the supplied key was the one used.
2934            if let Some(ref m) = get_sse_c_material {
2935                resp.output.sse_customer_algorithm = Some(crate::sse::SSE_C_ALGORITHM.into());
2936                resp.output.sse_customer_key_md5 =
2937                    Some(base64::engine::general_purpose::STANDARD.encode(m.key_md5));
2938            }
2939            // ====== Streaming fast path (CpuZstd, non-multipart, codec supports it) ======
2940            // 大規模 object (e.g. 5 GB) を memory に collect すると OOM するので、
2941            // codec が streaming-aware なら body を chunk-by-chunk で decompress して
2942            // 即座に client に流す。
2943            //
2944            // ただし Range request 時は streaming できない (slice するため total bytes
2945            // が必要) → buffered path に fall through。
2946            if range_request.is_none()
2947                && !needs_frame_parse
2948                && let Some(ref m) = manifest_opt
2949                && supports_streaming_decompress(m.codec)
2950                && m.codec == CodecKind::CpuZstd
2951            {
2952                // v0.8.4 #73 H-1: wrap the decompressor output in a
2953                // rolling-CRC32C verifier so a tampered ciphertext (or a
2954                // backend-side corruption that the zstd decoder happens
2955                // to "successfully" decode into wrong bytes) surfaces as
2956                // a streaming error tail at EOF instead of silently
2957                // delivering corrupt plaintext to the client. The wrap
2958                // is a pure pass-through during the body — no extra
2959                // buffering, TTFB unaffected — and the integrity
2960                // decision lands at the last chunk.
2961                let decompressed_blob = cpu_zstd_decompress_stream(blob);
2962                let verified_reader = Crc32cVerifyingReader::new(
2963                    blob_to_async_read(decompressed_blob),
2964                    m.crc32c,
2965                    m.original_size,
2966                );
2967                let verified_blob = async_read_to_blob(verified_reader);
2968                resp.output.content_length = Some(m.original_size as i64);
2969                resp.output.checksum_crc32 = None;
2970                resp.output.checksum_crc32c = None;
2971                resp.output.checksum_crc64nvme = None;
2972                resp.output.checksum_sha1 = None;
2973                resp.output.checksum_sha256 = None;
2974                resp.output.e_tag = None;
2975                resp.output.body = Some(verified_blob);
2976                let elapsed = get_start.elapsed();
2977                crate::metrics::record_get(
2978                    m.codec.as_str(),
2979                    m.compressed_size,
2980                    m.original_size,
2981                    elapsed.as_secs_f64(),
2982                    true,
2983                );
2984                info!(
2985                    op = "get_object",
2986                    bucket = %get_bucket,
2987                    key = %get_key,
2988                    codec = m.codec.as_str(),
2989                    bytes_in = m.compressed_size,
2990                    bytes_out = m.original_size,
2991                    path = "streaming",
2992                    setup_latency_ms = elapsed.as_millis() as u64,
2993                    "S4 get started (streaming)"
2994                );
2995                return Ok(resp);
2996            }
2997            // Passthrough: そのまま流す (Range なしの場合のみ streaming)
2998            if range_request.is_none()
2999                && !needs_frame_parse
3000                && let Some(ref m) = manifest_opt
3001                && m.codec == CodecKind::Passthrough
3002            {
3003                resp.output.content_length = Some(m.original_size as i64);
3004                resp.output.checksum_crc32 = None;
3005                resp.output.checksum_crc32c = None;
3006                resp.output.checksum_crc64nvme = None;
3007                resp.output.checksum_sha1 = None;
3008                resp.output.checksum_sha256 = None;
3009                resp.output.e_tag = None;
3010                resp.output.body = Some(blob);
3011                debug!("S4 get_object: passthrough streaming");
3012                return Ok(resp);
3013            }
3014
3015            // ====== Buffered slow path (multipart frame parser, GPU codecs) ======
3016            let bytes = collect_blob(blob, self.max_body_bytes)
3017                .await
3018                .map_err(internal("collect get body"))?;
3019
3020            let decompressed = if needs_frame_parse {
3021                // multipart objects と framed-v2 single-PUT objects は同じ
3022                // S4F2 frame 列なので decompress_multipart で統一処理
3023                self.decompress_multipart(bytes).await?
3024            } else {
3025                let manifest = manifest_opt.as_ref().expect("non-multipart guarded above");
3026                self.registry
3027                    .decompress(bytes, manifest)
3028                    .await
3029                    .map_err(internal("registry decompress"))?
3030            };
3031
3032            // Range request があれば slice。なければ full body を返す。
3033            let total_size = decompressed.len() as u64;
3034            let (final_bytes, status_override) = if let Some(r) = range_request.as_ref() {
3035                let (start, end) = resolve_range(r, total_size)
3036                    .map_err(|e| S3Error::with_message(S3ErrorCode::InvalidRange, e))?;
3037                let sliced = decompressed.slice(start as usize..end as usize);
3038                resp.output.content_range = Some(format!(
3039                    "bytes {start}-{}/{total_size}",
3040                    end.saturating_sub(1)
3041                ));
3042                (sliced, Some(http::StatusCode::PARTIAL_CONTENT))
3043            } else {
3044                (decompressed, None)
3045            };
3046            // 解凍後の真のサイズを返す (S3 client は content_length を信頼するので
3047            // 圧縮 size のままだと downstream が body を途中で切ってしまう)
3048            resp.output.content_length = Some(final_bytes.len() as i64);
3049            // 圧縮済 bytes の checksum を返すと AWS SDK 側で StreamingError
3050            // (ChecksumMismatch) になる。ETag も backend が返した「圧縮済 bytes の
3051            // MD5/checksum」なので意味的にズレる — クリアして S4 自身の crc32c
3052            // (manifest 内 / frame 内) で integrity を保証する設計にする。
3053            resp.output.checksum_crc32 = None;
3054            resp.output.checksum_crc32c = None;
3055            resp.output.checksum_crc64nvme = None;
3056            resp.output.checksum_sha1 = None;
3057            resp.output.checksum_sha256 = None;
3058            resp.output.e_tag = None;
3059            let returned_size = final_bytes.len() as u64;
3060            let codec_label = manifest_opt
3061                .as_ref()
3062                .map(|m| m.codec.as_str())
3063                .unwrap_or("multipart");
3064            resp.output.body = Some(bytes_to_blob(final_bytes));
3065            if let Some(status) = status_override {
3066                resp.status = Some(status);
3067            }
3068            let elapsed = get_start.elapsed();
3069            crate::metrics::record_get(codec_label, 0, returned_size, elapsed.as_secs_f64(), true);
3070            info!(
3071                op = "get_object",
3072                bucket = %get_bucket,
3073                key = %get_key,
3074                codec = codec_label,
3075                bytes_out = returned_size,
3076                total_object_size = total_size,
3077                range = range_request.is_some(),
3078                path = "buffered",
3079                latency_ms = elapsed.as_millis() as u64,
3080                "S4 get completed (buffered)"
3081            );
3082        }
3083        // v0.6 #40: echo the recorded `x-amz-replication-status` so
3084        // consumers can poll progress (PENDING / COMPLETED / FAILED).
3085        if let Some(mgr) = self.replication.as_ref()
3086            && let Some(status) = mgr.lookup_status(&get_bucket, &get_key)
3087        {
3088            resp.output.replication_status = Some(s3s::dto::ReplicationStatus::from(
3089                status.as_aws_str().to_owned(),
3090            ));
3091        }
3092        Ok(resp)
3093    }
3094
3095    // === passthrough delegations ===
3096    async fn head_bucket(
3097        &self,
3098        req: S3Request<HeadBucketInput>,
3099    ) -> S3Result<S3Response<HeadBucketOutput>> {
3100        self.backend.head_bucket(req).await
3101    }
3102    async fn list_buckets(
3103        &self,
3104        req: S3Request<ListBucketsInput>,
3105    ) -> S3Result<S3Response<ListBucketsOutput>> {
3106        self.backend.list_buckets(req).await
3107    }
3108    async fn create_bucket(
3109        &self,
3110        req: S3Request<CreateBucketInput>,
3111    ) -> S3Result<S3Response<CreateBucketOutput>> {
3112        self.backend.create_bucket(req).await
3113    }
3114    async fn delete_bucket(
3115        &self,
3116        req: S3Request<DeleteBucketInput>,
3117    ) -> S3Result<S3Response<DeleteBucketOutput>> {
3118        self.backend.delete_bucket(req).await
3119    }
3120    async fn head_object(
3121        &self,
3122        req: S3Request<HeadObjectInput>,
3123    ) -> S3Result<S3Response<HeadObjectOutput>> {
3124        // v0.6 #40: capture bucket/key before req is consumed so the
3125        // replication-status echo can look the entry up.
3126        let head_bucket = req.input.bucket.clone();
3127        let head_key = req.input.key.clone();
3128        let mut resp = self.backend.head_object(req).await?;
3129        if let Some(manifest) = extract_manifest(&resp.output.metadata) {
3130            // 客側には decompress 後の意味のある content_length / checksum を返す。
3131            // backend が返す圧縮済 bytes の checksum / e_tag は意味が違うため除去
3132            // (S4 は manifest 内の crc32c で integrity を担保する)。
3133            resp.output.content_length = Some(manifest.original_size as i64);
3134            resp.output.checksum_crc32 = None;
3135            resp.output.checksum_crc32c = None;
3136            resp.output.checksum_crc64nvme = None;
3137            resp.output.checksum_sha1 = None;
3138            resp.output.checksum_sha256 = None;
3139            resp.output.e_tag = None;
3140        }
3141        // v0.6 #40: echo `x-amz-replication-status` (PENDING / COMPLETED
3142        // / FAILED) so consumers can poll progress without a GET.
3143        if let Some(mgr) = self.replication.as_ref()
3144            && let Some(status) = mgr.lookup_status(&head_bucket, &head_key)
3145        {
3146            resp.output.replication_status = Some(s3s::dto::ReplicationStatus::from(
3147                status.as_aws_str().to_owned(),
3148            ));
3149        }
3150        // v0.7 #48 BUG-4 fix: HEAD must echo SSE indicators so SDKs
3151        // and pipelines see the same posture they got on PUT. The PUT
3152        // path stamps `s4-sse-type` metadata for exactly this — HEAD
3153        // doesn't fetch the body, so it can't peek frame magic.
3154        if let Some(meta) = resp.output.metadata.as_ref()
3155            && let Some(sse_type) = meta.get("s4-sse-type")
3156        {
3157            {
3158                match sse_type.as_str() {
3159                    "aws:kms" => {
3160                        resp.output.server_side_encryption = Some(
3161                            ServerSideEncryption::from_static(ServerSideEncryption::AWS_KMS),
3162                        );
3163                        if let Some(key_id) = meta.get("s4-sse-kms-key-id") {
3164                            resp.output.ssekms_key_id = Some(key_id.clone());
3165                        }
3166                    }
3167                    _ => {
3168                        resp.output.server_side_encryption = Some(
3169                            ServerSideEncryption::from_static(ServerSideEncryption::AES256),
3170                        );
3171                        if let Some(md5) = meta.get("s4-sse-c-key-md5") {
3172                            resp.output.sse_customer_algorithm =
3173                                Some(crate::sse::SSE_C_ALGORITHM.into());
3174                            resp.output.sse_customer_key_md5 = Some(md5.clone());
3175                        }
3176                    }
3177                }
3178            }
3179        }
3180        Ok(resp)
3181    }
3182    async fn delete_object(
3183        &self,
3184        mut req: S3Request<DeleteObjectInput>,
3185    ) -> S3Result<S3Response<DeleteObjectOutput>> {
3186        let bucket = req.input.bucket.clone();
3187        let key = req.input.key.clone();
3188        self.enforce_rate_limit(&req, &bucket)?;
3189        self.enforce_policy(&req, "s3:DeleteObject", &bucket, Some(&key))?;
3190        // v0.6 #42: MFA Delete enforcement. When the bucket has
3191        // MFA-Delete = Enabled, every DELETE / DELETE-version /
3192        // delete-marker form needs `x-amz-mfa: <serial> <code>` (RFC 6238
3193        // 6-digit TOTP). Runs *before* the WORM / versioning routers so
3194        // a missing token is denied for free regardless of which delete
3195        // path the request would otherwise take.
3196        if let Some(mgr) = self.mfa_delete.as_ref()
3197            && mgr.is_enabled(&bucket)
3198        {
3199            let header = req.input.mfa.as_deref();
3200            if let Err(e) = crate::mfa::check_mfa(&bucket, header, mgr, current_unix_secs()) {
3201                crate::metrics::record_mfa_delete_denial(&bucket);
3202                return Err(mfa_error_to_s3(e));
3203            }
3204        }
3205        // v0.5 #30: refuse the delete while a WORM lock is in effect.
3206        // Compliance can never be bypassed; Governance can be overridden
3207        // via `x-amz-bypass-governance-retention: true`; legal hold
3208        // never. The check happens before the versioning router so a
3209        // locked object can't be soft-deleted (delete-marker push) on an
3210        // Enabled bucket either — S3 spec says lock applies to all
3211        // delete forms.
3212        if let Some(mgr) = self.object_lock.as_ref()
3213            && let Some(state) = mgr.get(&bucket, &key)
3214        {
3215            let bypass = req.input.bypass_governance_retention.unwrap_or(false);
3216            let now = chrono::Utc::now();
3217            if !state.can_delete(now, bypass) {
3218                crate::metrics::record_policy_denial("s3:DeleteObject", &bucket);
3219                return Err(S3Error::with_message(
3220                    S3ErrorCode::AccessDenied,
3221                    "Access Denied because object protected by object lock",
3222                ));
3223            }
3224        }
3225        // v0.5 #34: route DELETE through the VersioningManager when the
3226        // bucket is in a versioning-aware state.
3227        //
3228        // - Enabled bucket, no version_id → push a delete marker into
3229        //   the chain. NO backend object is touched (older versions
3230        //   stay reachable via specific-version GET).
3231        // - Enabled / Suspended bucket, with version_id → physical
3232        //   delete. Backend bytes at the shadow key (or `<key>` for
3233        //   `null`) are removed; chain entry is dropped. If the deleted
3234        //   entry was a delete marker, no backend bytes exist for it
3235        //   (record-only).
3236        // - Suspended bucket, no version_id → push a "null" delete
3237        //   marker (S3 spec); backend bytes at `<key>` are physically
3238        //   removed (same as legacy).
3239        // - Unversioned bucket → fall through to legacy passthrough.
3240        if let Some(mgr) = self.versioning.as_ref() {
3241            let state = mgr.state(&bucket);
3242            if state != crate::versioning::VersioningState::Unversioned {
3243                let req_vid = req.input.version_id.take();
3244                if let Some(vid) = req_vid {
3245                    // Specific-version DELETE: touch backend bytes only
3246                    // when the entry was a real version (not a delete
3247                    // marker, which has no backend bytes).
3248                    let outcome = mgr.record_delete_specific(&bucket, &key, &vid);
3249                    let backend_target = if vid == crate::versioning::NULL_VERSION_ID {
3250                        key.clone()
3251                    } else {
3252                        versioned_shadow_key(&key, &vid)
3253                    };
3254                    let was_real_version = outcome
3255                        .as_ref()
3256                        .map(|o| !o.is_delete_marker)
3257                        .unwrap_or(false);
3258                    if was_real_version {
3259                        // Best-effort backend cleanup; missing bytes
3260                        // are not an error (e.g. shadow key already
3261                        // GC'd).
3262                        let backend_input = DeleteObjectInput {
3263                            bucket: bucket.clone(),
3264                            key: backend_target,
3265                            ..Default::default()
3266                        };
3267                        let backend_req = S3Request {
3268                            input: backend_input,
3269                            method: http::Method::DELETE,
3270                            uri: req.uri.clone(),
3271                            headers: req.headers.clone(),
3272                            extensions: http::Extensions::new(),
3273                            credentials: req.credentials.clone(),
3274                            region: req.region.clone(),
3275                            service: req.service.clone(),
3276                            trailing_headers: None,
3277                        };
3278                        let _ = self.backend.delete_object(backend_req).await;
3279                    }
3280                    let mut output = DeleteObjectOutput {
3281                        version_id: Some(vid.clone()),
3282                        ..Default::default()
3283                    };
3284                    if let Some(o) = outcome.as_ref()
3285                        && o.is_delete_marker
3286                    {
3287                        output.delete_marker = Some(true);
3288                    }
3289                    // v0.6 #35: specific-version DELETE always counts as
3290                    // a hard `ObjectRemoved:Delete` event (the chain
3291                    // entry, marker or not, is gone after this call).
3292                    self.fire_delete_notification(
3293                        &bucket,
3294                        &key,
3295                        crate::notifications::EventType::ObjectRemovedDelete,
3296                        Some(vid.clone()),
3297                    );
3298                    return Ok(S3Response::new(output));
3299                }
3300                // No version_id: record a delete marker (state-aware).
3301                let outcome = mgr.record_delete(&bucket, &key);
3302                if state == crate::versioning::VersioningState::Suspended {
3303                    // Suspended buckets also evict the prior `<key>`
3304                    // bytes (the previous null version is gone too).
3305                    let backend_input = DeleteObjectInput {
3306                        bucket: bucket.clone(),
3307                        key: key.clone(),
3308                        ..Default::default()
3309                    };
3310                    let backend_req = S3Request {
3311                        input: backend_input,
3312                        method: http::Method::DELETE,
3313                        uri: req.uri.clone(),
3314                        headers: req.headers.clone(),
3315                        extensions: http::Extensions::new(),
3316                        credentials: req.credentials.clone(),
3317                        region: req.region.clone(),
3318                        service: req.service.clone(),
3319                        trailing_headers: None,
3320                    };
3321                    let _ = self.backend.delete_object(backend_req).await;
3322                }
3323                let output = DeleteObjectOutput {
3324                    delete_marker: Some(true),
3325                    version_id: outcome.version_id.clone(),
3326                    ..Default::default()
3327                };
3328                // v0.6 #35: versioned bucket DELETE without a version-id
3329                // creates a delete marker — the dedicated AWS event
3330                // taxonomy entry. Suspended-state buckets also push a
3331                // (null) marker, so the same event fires there.
3332                self.fire_delete_notification(
3333                    &bucket,
3334                    &key,
3335                    crate::notifications::EventType::ObjectRemovedDeleteMarker,
3336                    outcome.version_id,
3337                );
3338                return Ok(S3Response::new(output));
3339            }
3340        }
3341        // Legacy / Unversioned path: physical delete on the backend +
3342        // best-effort sidecar cleanup (mirrors v0.4 behaviour).
3343        let resp = self.backend.delete_object(req).await?;
3344        // v0.5 #30: drop any per-object lock state once the delete has
3345        // succeeded so the freed key can be re-armed by a future PUT
3346        // under the bucket default. Reaching here implies the lock had
3347        // already passed `can_delete` above, so this is purely cleanup.
3348        if let Some(mgr) = self.object_lock.as_ref() {
3349            mgr.clear(&bucket, &key);
3350        }
3351        // v0.6 #39: drop any object-level tag set on physical delete —
3352        // the freed key starts a fresh tag history if a future PUT
3353        // re-creates it. (Versioned-delete branches above return early
3354        // and do NOT touch tags, mirroring AWS where tag state is
3355        // attached to the logical key, not the version chain.)
3356        if let Some(mgr) = self.tagging.as_ref() {
3357            mgr.delete_object_tags(&bucket, &key);
3358        }
3359        let sidecar = sidecar_key(&key);
3360        // v0.7 #49: skip the sidecar DELETE if the key + sidecar suffix
3361        // can't be encoded into a request URI — the primary delete
3362        // already succeeded and a stale sidecar is harmless (Range GET
3363        // re-validates the underlying object on next read).
3364        if let Ok(uri) = safe_object_uri(&bucket, &sidecar) {
3365            let sidecar_input = DeleteObjectInput {
3366                bucket: bucket.clone(),
3367                key: sidecar,
3368                ..Default::default()
3369            };
3370            let sidecar_req = S3Request {
3371                input: sidecar_input,
3372                method: http::Method::DELETE,
3373                uri,
3374                headers: http::HeaderMap::new(),
3375                extensions: http::Extensions::new(),
3376                credentials: None,
3377                region: None,
3378                service: None,
3379                trailing_headers: None,
3380            };
3381            let _ = self.backend.delete_object(sidecar_req).await;
3382        }
3383        // v0.6 #35: legacy unversioned-bucket hard delete fires the
3384        // canonical `ObjectRemoved:Delete` event.
3385        self.fire_delete_notification(
3386            &bucket,
3387            &key,
3388            crate::notifications::EventType::ObjectRemovedDelete,
3389            None,
3390        );
3391        Ok(resp)
3392    }
3393    async fn delete_objects(
3394        &self,
3395        req: S3Request<DeleteObjectsInput>,
3396    ) -> S3Result<S3Response<DeleteObjectsOutput>> {
3397        // v0.6 #42: MFA Delete applies once to the whole batch (S3 spec:
3398        // when MFA-Delete is on the bucket, a missing / invalid token
3399        // fails the entire DeleteObjects request, not per-object).
3400        if let Some(mgr) = self.mfa_delete.as_ref()
3401            && mgr.is_enabled(&req.input.bucket)
3402        {
3403            let header = req.input.mfa.as_deref();
3404            if let Err(e) =
3405                crate::mfa::check_mfa(&req.input.bucket, header, mgr, current_unix_secs())
3406            {
3407                crate::metrics::record_mfa_delete_denial(&req.input.bucket);
3408                return Err(mfa_error_to_s3(e));
3409            }
3410        }
3411        self.backend.delete_objects(req).await
3412    }
3413    async fn copy_object(
3414        &self,
3415        mut req: S3Request<CopyObjectInput>,
3416    ) -> S3Result<S3Response<CopyObjectOutput>> {
3417        // copy is conceptually "GetObject src + PutObject dst" — enforce both.
3418        let dst_bucket = req.input.bucket.clone();
3419        let dst_key = req.input.key.clone();
3420        self.enforce_policy(&req, "s3:PutObject", &dst_bucket, Some(&dst_key))?;
3421        if let CopySource::Bucket { bucket, key, .. } = &req.input.copy_source {
3422            self.enforce_policy(&req, "s3:GetObject", bucket, Some(key))?;
3423        }
3424        // S4-aware copy: source object に s4-* metadata がある場合、それを
3425        // destination に確実に preserve する。
3426        //
3427        // - MetadataDirective::COPY (default): backend が source metadata を
3428        //   そのまま copy するので S4 metadata も自動で渡る。介入不要
3429        // - MetadataDirective::REPLACE: 客が指定した metadata で source を
3430        //   上書き → s4-* metadata が消えると destination は decompress 不能に
3431        //   なる (silent corruption)。S4 が source metadata を HEAD で取得し、
3432        //   s4-* fields を input.metadata に強制 merge する
3433        let needs_merge = req
3434            .input
3435            .metadata_directive
3436            .as_ref()
3437            .map(|d| d.as_str() == MetadataDirective::REPLACE)
3438            .unwrap_or(false);
3439        if needs_merge && let CopySource::Bucket { bucket, key, .. } = &req.input.copy_source {
3440            let head_input = HeadObjectInput {
3441                bucket: bucket.to_string(),
3442                key: key.to_string(),
3443                ..Default::default()
3444            };
3445            let head_req = S3Request {
3446                input: head_input,
3447                method: req.method.clone(),
3448                uri: req.uri.clone(),
3449                headers: req.headers.clone(),
3450                extensions: http::Extensions::new(),
3451                credentials: req.credentials.clone(),
3452                region: req.region.clone(),
3453                service: req.service.clone(),
3454                trailing_headers: None,
3455            };
3456            if let Ok(head) = self.backend.head_object(head_req).await
3457                && let Some(src_meta) = head.output.metadata.as_ref()
3458            {
3459                let dest_meta = req.input.metadata.get_or_insert_with(Default::default);
3460                for key in [
3461                    META_CODEC,
3462                    META_ORIGINAL_SIZE,
3463                    META_COMPRESSED_SIZE,
3464                    META_CRC32C,
3465                    META_MULTIPART,
3466                    META_FRAMED,
3467                ] {
3468                    if let Some(v) = src_meta.get(key) {
3469                        // 客が同じ key を指定していたら preserve しない (= 上書き許可)
3470                        // していたら何もしない。指定していなければ insert
3471                        dest_meta
3472                            .entry(key.to_string())
3473                            .or_insert_with(|| v.clone());
3474                    }
3475                }
3476                debug!(
3477                    src_bucket = %bucket,
3478                    src_key = %key,
3479                    "S4 copy_object: preserved s4-* metadata across REPLACE directive"
3480                );
3481            }
3482        }
3483        self.backend.copy_object(req).await
3484    }
3485    async fn list_objects(
3486        &self,
3487        req: S3Request<ListObjectsInput>,
3488    ) -> S3Result<S3Response<ListObjectsOutput>> {
3489        self.enforce_rate_limit(&req, &req.input.bucket)?;
3490        self.enforce_policy(&req, "s3:ListBucket", &req.input.bucket, None)?;
3491        let mut resp = self.backend.list_objects(req).await?;
3492        // S4 内部 object (`*.s4index` sidecar、`.__s4ver__/` shadow versions
3493        // — v0.5 #34) を顧客から隠す。
3494        if let Some(contents) = resp.output.contents.as_mut() {
3495            contents.retain(|o| {
3496                o.key
3497                    .as_ref()
3498                    .map(|k| !k.ends_with(".s4index") && !is_versioning_shadow_key(k))
3499                    .unwrap_or(true)
3500            });
3501        }
3502        Ok(resp)
3503    }
3504    async fn list_objects_v2(
3505        &self,
3506        req: S3Request<ListObjectsV2Input>,
3507    ) -> S3Result<S3Response<ListObjectsV2Output>> {
3508        self.enforce_rate_limit(&req, &req.input.bucket)?;
3509        self.enforce_policy(&req, "s3:ListBucket", &req.input.bucket, None)?;
3510        let mut resp = self.backend.list_objects_v2(req).await?;
3511        if let Some(contents) = resp.output.contents.as_mut() {
3512            let before = contents.len();
3513            contents.retain(|o| {
3514                o.key
3515                    .as_ref()
3516                    .map(|k| !k.ends_with(".s4index") && !is_versioning_shadow_key(k))
3517                    .unwrap_or(true)
3518            });
3519            // key_count も補正 (S3 spec compliance)
3520            if let Some(kc) = resp.output.key_count.as_mut() {
3521                *kc -= (before - contents.len()) as i32;
3522            }
3523        }
3524        Ok(resp)
3525    }
3526    /// v0.4 #17: filter S4-internal sidecars from versioned listings.
3527    /// v0.5 #34: when a [`crate::versioning::VersioningManager`] is
3528    /// attached AND the bucket is in a versioning-aware state, build
3529    /// the `Versions` / `DeleteMarkers` arrays directly from the
3530    /// in-memory chain (paginated + ordered the S3 way: key asc,
3531    /// version newest-first inside each key). Otherwise fall back to
3532    /// passthrough + sidecar-filter (legacy v0.4 behaviour).
3533    async fn list_object_versions(
3534        &self,
3535        req: S3Request<ListObjectVersionsInput>,
3536    ) -> S3Result<S3Response<ListObjectVersionsOutput>> {
3537        self.enforce_rate_limit(&req, &req.input.bucket)?;
3538        self.enforce_policy(&req, "s3:ListBucket", &req.input.bucket, None)?;
3539        // v0.5 #34: VersioningManager-owned path.
3540        if let Some(mgr) = self.versioning.as_ref()
3541            && mgr.state(&req.input.bucket) != crate::versioning::VersioningState::Unversioned
3542        {
3543            let max_keys = req.input.max_keys.unwrap_or(1000) as usize;
3544            let page = mgr.list_versions(
3545                &req.input.bucket,
3546                req.input.prefix.as_deref(),
3547                req.input.key_marker.as_deref(),
3548                req.input.version_id_marker.as_deref(),
3549                max_keys,
3550            );
3551            let versions: Vec<ObjectVersion> = page
3552                .versions
3553                .into_iter()
3554                .map(|e| ObjectVersion {
3555                    key: Some(e.key),
3556                    version_id: Some(e.version_id),
3557                    is_latest: Some(e.is_latest),
3558                    e_tag: Some(ETag::Strong(e.etag)),
3559                    size: Some(e.size as i64),
3560                    last_modified: Some(std::time::SystemTime::from(e.last_modified).into()),
3561                    ..Default::default()
3562                })
3563                .collect();
3564            let delete_markers: Vec<DeleteMarkerEntry> = page
3565                .delete_markers
3566                .into_iter()
3567                .map(|e| DeleteMarkerEntry {
3568                    key: Some(e.key),
3569                    version_id: Some(e.version_id),
3570                    is_latest: Some(e.is_latest),
3571                    last_modified: Some(std::time::SystemTime::from(e.last_modified).into()),
3572                    ..Default::default()
3573                })
3574                .collect();
3575            let output = ListObjectVersionsOutput {
3576                name: Some(req.input.bucket.clone()),
3577                prefix: req.input.prefix.clone(),
3578                key_marker: req.input.key_marker.clone(),
3579                version_id_marker: req.input.version_id_marker.clone(),
3580                max_keys: req.input.max_keys,
3581                versions: if versions.is_empty() {
3582                    None
3583                } else {
3584                    Some(versions)
3585                },
3586                delete_markers: if delete_markers.is_empty() {
3587                    None
3588                } else {
3589                    Some(delete_markers)
3590                },
3591                is_truncated: Some(page.is_truncated),
3592                next_key_marker: page.next_key_marker,
3593                next_version_id_marker: page.next_version_id_marker,
3594                ..Default::default()
3595            };
3596            return Ok(S3Response::new(output));
3597        }
3598        // Legacy passthrough path (v0.4 #17 sidecar filter retained).
3599        let mut resp = self.backend.list_object_versions(req).await?;
3600        if let Some(versions) = resp.output.versions.as_mut() {
3601            versions.retain(|v| {
3602                v.key
3603                    .as_ref()
3604                    .map(|k| !k.ends_with(".s4index") && !is_versioning_shadow_key(k))
3605                    .unwrap_or(true)
3606            });
3607        }
3608        if let Some(markers) = resp.output.delete_markers.as_mut() {
3609            markers.retain(|m| {
3610                m.key
3611                    .as_ref()
3612                    .map(|k| !k.ends_with(".s4index") && !is_versioning_shadow_key(k))
3613                    .unwrap_or(true)
3614            });
3615        }
3616        Ok(resp)
3617    }
3618
3619    async fn create_multipart_upload(
3620        &self,
3621        mut req: S3Request<CreateMultipartUploadInput>,
3622    ) -> S3Result<S3Response<CreateMultipartUploadOutput>> {
3623        // Multipart object は per-part 圧縮 + frame 形式で書く。GET 時に
3624        // frame parse を起動するため、object metadata に flag を立てる。
3625        // codec は dispatcher の default kind を採用 (per-part 別 codec は Phase 2)。
3626        let codec_kind = self.registry.default_kind();
3627        let meta = req.input.metadata.get_or_insert_with(Default::default);
3628        meta.insert(META_MULTIPART.into(), "true".into());
3629        meta.insert(META_CODEC.into(), codec_kind.as_str().into());
3630        // v0.8 #54 BUG-10 fix: take() the SSE request fields off
3631        // `req.input` so they are NOT forwarded to the backend on
3632        // CreateMultipartUpload. Same root cause as v0.7 #48 BUG-2/3 on
3633        // single-PUT — MinIO rejects SSE-C with "HTTPS required" and
3634        // SSE-KMS with "KMS not configured" when the headers reach it.
3635        // S4 owns the encrypt-then-store contract; we capture the
3636        // recipe in `multipart_state` here and apply it on Complete.
3637        let sse_c_alg = req.input.sse_customer_algorithm.take();
3638        let sse_c_key = req.input.sse_customer_key.take();
3639        let sse_c_md5 = req.input.sse_customer_key_md5.take();
3640        let sse_header = req.input.server_side_encryption.take();
3641        let sse_kms_key = req.input.ssekms_key_id.take();
3642        // Strip the encryption-context too — leaving it would make
3643        // MinIO try to validate it against a non-existent KMS key.
3644        let _ = req.input.ssekms_encryption_context.take();
3645        let sse_c_material = extract_sse_c_material(&sse_c_alg, &sse_c_key, &sse_c_md5)?;
3646        let kms_key_id = extract_kms_key_id(
3647            &sse_header,
3648            &sse_kms_key,
3649            self.kms_default_key_id.as_deref(),
3650        );
3651        // SSE-C / SSE-KMS exclusivity (mirrors put_object L1870).
3652        if sse_c_material.is_some() && kms_key_id.is_some() {
3653            return Err(S3Error::with_message(
3654                S3ErrorCode::InvalidArgument,
3655                "SSE-C and SSE-KMS cannot be used together on the same multipart upload",
3656            ));
3657        }
3658        let sse_mode = if let Some(ref m) = sse_c_material {
3659            // v0.8.2 #62 (H-6 audit fix): wrap the customer-supplied
3660            // 32-byte key in `Zeroizing` so abandoned uploads (or
3661            // normal Complete/Abort) wipe the key bytes on drop. The
3662            // `key_md5` is the public fingerprint and stays as a
3663            // bare `[u8; 16]`.
3664            crate::multipart_state::MultipartSseMode::SseC {
3665                key: zeroize::Zeroizing::new(m.key),
3666                key_md5: m.key_md5,
3667            }
3668        } else if let Some(ref kid) = kms_key_id {
3669            // KMS pre-flight: fail at Create rather than at Complete if
3670            // the gateway has no KMS backend wired (mirrors the
3671            // put_object L1879 check).
3672            if self.kms.is_none() {
3673                return Err(S3Error::with_message(
3674                    S3ErrorCode::InvalidRequest,
3675                    "SSE-KMS requested but no --kms-local-dir / --kms-aws-region is configured on this gateway",
3676                ));
3677            }
3678            crate::multipart_state::MultipartSseMode::SseKms {
3679                key_id: kid.clone(),
3680            }
3681        } else if self.sse_keyring.is_some() {
3682            // SSE-S4: server-driven transparent encryption. Activates
3683            // whenever the gateway has a keyring configured AND the
3684            // client didn't pick a different SSE mode.
3685            crate::multipart_state::MultipartSseMode::SseS4
3686        } else {
3687            crate::multipart_state::MultipartSseMode::None
3688        };
3689        // v0.8 #54 BUG-9 fix: parse the Tagging header on Create. The
3690        // single-PUT path does this on PutObject; the multipart path
3691        // captures it now and commits via TagManager on Complete.
3692        let request_tags: Option<crate::tagging::TagSet> = req
3693            .input
3694            .tagging
3695            .as_deref()
3696            .map(crate::tagging::parse_tagging_header)
3697            .transpose()
3698            .map_err(|e| S3Error::with_message(S3ErrorCode::InvalidArgument, e.to_string()))?;
3699        // Strip the `Tagging` field off the input so the backend
3700        // doesn't try to apply it (no-op on MinIO but keeps the wire
3701        // clean).
3702        let _ = req.input.tagging.take();
3703        // Object Lock recipe (BUG-7 — captured here, applied on Complete).
3704        let explicit_lock_mode: Option<crate::object_lock::LockMode> = req
3705            .input
3706            .object_lock_mode
3707            .as_ref()
3708            .and_then(|m| crate::object_lock::LockMode::from_aws_str(m.as_str()));
3709        let explicit_retain_until: Option<chrono::DateTime<chrono::Utc>> = req
3710            .input
3711            .object_lock_retain_until_date
3712            .as_ref()
3713            .and_then(timestamp_to_chrono_utc);
3714        let explicit_legal_hold_on: bool = req
3715            .input
3716            .object_lock_legal_hold_status
3717            .as_ref()
3718            .map(|s| s.as_str().eq_ignore_ascii_case("ON"))
3719            .unwrap_or(false);
3720        let bucket = req.input.bucket.clone();
3721        let key = req.input.key.clone();
3722        debug!(
3723            bucket = %bucket,
3724            key = %key,
3725            codec = codec_kind.as_str(),
3726            sse = ?sse_mode,
3727            "S4 create_multipart_upload: marking object for per-part compression"
3728        );
3729        let mut resp = self.backend.create_multipart_upload(req).await?;
3730        // Stash the per-upload context only after the backend handed
3731        // us an upload_id (failed Creates leave nothing in the store).
3732        if let Some(upload_id) = resp.output.upload_id.as_ref() {
3733            self.multipart_state.put(
3734                upload_id,
3735                crate::multipart_state::MultipartUploadContext {
3736                    bucket,
3737                    key,
3738                    sse: sse_mode.clone(),
3739                    tags: request_tags,
3740                    object_lock_mode: explicit_lock_mode,
3741                    object_lock_retain_until: explicit_retain_until,
3742                    object_lock_legal_hold: explicit_legal_hold_on,
3743                },
3744            );
3745        }
3746        // SSE-C / SSE-KMS response echo (mirrors put_object L2036-L2050).
3747        match &sse_mode {
3748            crate::multipart_state::MultipartSseMode::SseC { key_md5, .. } => {
3749                resp.output.sse_customer_algorithm = Some(crate::sse::SSE_C_ALGORITHM.into());
3750                resp.output.sse_customer_key_md5 =
3751                    Some(base64::engine::general_purpose::STANDARD.encode(key_md5));
3752            }
3753            crate::multipart_state::MultipartSseMode::SseKms { key_id } => {
3754                resp.output.server_side_encryption = Some(ServerSideEncryption::from_static(
3755                    ServerSideEncryption::AWS_KMS,
3756                ));
3757                resp.output.ssekms_key_id = Some(key_id.clone());
3758            }
3759            _ => {}
3760        }
3761        Ok(resp)
3762    }
3763
3764    async fn upload_part(
3765        &self,
3766        mut req: S3Request<UploadPartInput>,
3767    ) -> S3Result<S3Response<UploadPartOutput>> {
3768        // 各 part を圧縮して frame header 付きで forward。GET 時に
3769        // `decompress_multipart` が frame iter で順に解凍する。
3770        // **per-part codec dispatch**: dispatcher が body 先頭 sample から
3771        // codec を選ぶので、parquet 風の mixed-content multipart で part ごとに
3772        // 最適 codec を使える (整数列 part → Bitcomp、text 列 part → zstd 等)。
3773        //
3774        // v0.8 #54 BUG-5/BUG-10 fix: lookup the per-upload SSE
3775        // context captured by `create_multipart_upload` and (a) strip
3776        // any SSE-C request headers off `req.input` so the backend
3777        // doesn't see them — same root cause as v0.7 #48 BUG-2/3 on
3778        // single-PUT; MinIO refuses SSE-C parts over HTTP — and (b)
3779        // observe that an upload context exists for `upload_id`. The
3780        // actual encrypt happens once at `complete_multipart_upload`
3781        // time on the assembled body (the per-part-encrypt approach
3782        // would require a matching multi-segment decrypt path on GET;
3783        // encrypting the whole assembled body keeps the GET path's
3784        // `is_sse_encrypted` branch in get_object L2429 working
3785        // unchanged).
3786        let sse_ctx = self.multipart_state.get(req.input.upload_id.as_str());
3787        // v0.8.2 #62 (H-1 audit fix): SSE-C key consistency check.
3788        // The AWS S3 spec requires the same SSE-C key headers on
3789        // every UploadPart and rejects mismatches with 400. Prior to
3790        // #62 we silently stripped the headers (BUG-10 fix) without
3791        // validating them, allowing a client to send part 1 under
3792        // key-A and part 2 under key-B; both got stored, then
3793        // re-encrypted with key-A on Complete — the client thinks
3794        // part 2 is under key-B but a GET with key-B would in fact
3795        // hit the part-1 ciphertext that was actually encrypted with
3796        // key-A. That would either decrypt successfully (silent
3797        // corruption: client lost track of which key encrypts what)
3798        // or fail in a confusing way. Validate the per-part headers
3799        // now and reject with 400 InvalidArgument on mismatch /
3800        // omission / partial supply, matching real-S3 behaviour.
3801        if let Some(ref ctx) = sse_ctx {
3802            if let crate::multipart_state::MultipartSseMode::SseC {
3803                key_md5: ctx_md5, ..
3804            } = &ctx.sse
3805            {
3806                let alg = req.input.sse_customer_algorithm.take();
3807                let key_b64 = req.input.sse_customer_key.take();
3808                let md5_b64 = req.input.sse_customer_key_md5.take();
3809                match (alg, key_b64, md5_b64) {
3810                    (Some(a), Some(k), Some(m)) => {
3811                        // Parse + validate; if the per-part headers
3812                        // are themselves malformed (algorithm not
3813                        // AES256, MD5 mismatch, key not 32 bytes)
3814                        // surface the same 400 the single-PUT path
3815                        // would. Then compare the parsed MD5 to the
3816                        // upload-context's MD5; mismatch is a
3817                        // different-key UploadPart and must reject.
3818                        let part_material = crate::sse::parse_customer_key_headers(&a, &k, &m)
3819                            .map_err(sse_c_error_to_s3)?;
3820                        if part_material.key_md5 != *ctx_md5 {
3821                            return Err(S3Error::with_message(
3822                                S3ErrorCode::InvalidArgument,
3823                                "SSE-C key on UploadPart does not match the key supplied on CreateMultipartUpload",
3824                            ));
3825                        }
3826                        // OK — same key as Create. Headers are
3827                        // already taken off `req.input` so the
3828                        // backend never sees them.
3829                    }
3830                    (None, None, None) => {
3831                        // AWS S3 spec: SSE-C headers MUST be replayed
3832                        // on every UploadPart of an SSE-C multipart.
3833                        // Real-S3 returns 400 InvalidRequest in this
3834                        // case; mirror that.
3835                        return Err(S3Error::with_message(
3836                            S3ErrorCode::InvalidRequest,
3837                            "SSE-C requires customer-key headers on every UploadPart (CreateMultipartUpload was SSE-C)",
3838                        ));
3839                    }
3840                    _ => {
3841                        // Partial header set (e.g. algorithm + key
3842                        // but no MD5) — same handling as the
3843                        // single-PUT `extract_sse_c_material` helper.
3844                        return Err(S3Error::with_message(
3845                            S3ErrorCode::InvalidRequest,
3846                            "SSE-C requires all three of: x-amz-server-side-encryption-customer-{algorithm,key,key-MD5}",
3847                        ));
3848                    }
3849                }
3850            } else {
3851                // CreateMultipartUpload was non-SSE-C (None / SseS4 /
3852                // SseKms). A part that arrives carrying SSE-C headers
3853                // is either a confused client or an attempt to
3854                // smuggle SSE-C around the gateway-internal SSE
3855                // recipe. Reject with 400 InvalidRequest rather than
3856                // silently strip — the strip would let the client
3857                // believe the part was encrypted under their key
3858                // when in fact the upload's encryption recipe is
3859                // whatever the Create captured.
3860                if req.input.sse_customer_algorithm.is_some()
3861                    || req.input.sse_customer_key.is_some()
3862                    || req.input.sse_customer_key_md5.is_some()
3863                {
3864                    return Err(S3Error::with_message(
3865                        S3ErrorCode::InvalidRequest,
3866                        "UploadPart sent SSE-C headers but CreateMultipartUpload was not SSE-C",
3867                    ));
3868                }
3869            }
3870        } else {
3871            // No upload context registered (gateway crashed between
3872            // Create and Part, or pre-#62 abandoned-upload restore).
3873            // We can't check key consistency in this case — strip
3874            // the headers and let the request through unchanged so
3875            // the backend's `NoSuchUpload` reply (or whatever it
3876            // chooses to do) flows back to the client.
3877            let _ = req.input.sse_customer_algorithm.take();
3878            let _ = req.input.sse_customer_key.take();
3879            let _ = req.input.sse_customer_key_md5.take();
3880        }
3881        let _sse_ctx = sse_ctx;
3882        if let Some(blob) = req.input.body.take() {
3883            let bytes = collect_blob(blob, self.max_body_bytes)
3884                .await
3885                .map_err(internal("collect upload_part body"))?;
3886            let sample_len = bytes.len().min(SAMPLE_BYTES);
3887            // v0.8 #56: full part body is already in memory here; use its
3888            // length as the size hint so the dispatcher can promote to GPU
3889            // if it's big enough.
3890            let codec_kind = self
3891                .dispatcher
3892                .pick_with_size_hint(&bytes[..sample_len], Some(bytes.len() as u64))
3893                .await;
3894            let original_size = bytes.len() as u64;
3895            // v0.8 #55: telemetry-returning compress (GPU metrics stamp).
3896            let (compress_res, tel) = self
3897                .registry
3898                .compress_with_telemetry(bytes, codec_kind)
3899                .await;
3900            stamp_gpu_compress_telemetry(&tel);
3901            let (compressed, manifest) =
3902                compress_res.map_err(internal("registry compress part"))?;
3903            let header = FrameHeader {
3904                codec: codec_kind,
3905                original_size,
3906                compressed_size: compressed.len() as u64,
3907                crc32c: manifest.crc32c,
3908            };
3909            let mut framed = BytesMut::with_capacity(FRAME_HEADER_BYTES + compressed.len());
3910            write_frame(&mut framed, header, &compressed);
3911            // v0.2 #5: heuristic-based padding skip for likely-final parts.
3912            //
3913            // AWS SDK / aws-cli / boto3 always send the final (and only the
3914            // final) part below the configured part_size. So if the raw user
3915            // part is already smaller than S3's 5 MiB multipart minimum, this
3916            // is overwhelmingly likely to be the final part — and the final
3917            // part is exempt from S3's size constraint. Skipping padding here
3918            // saves up to ~5 MiB per object on highly compressible workloads.
3919            //
3920            // If a misbehaving client sends a tiny **non-final** part, S3
3921            // itself rejects with EntityTooSmall at CompleteMultipartUpload —
3922            // identical outcome to a vanilla S3 PUT, just earlier than
3923            // padding-then-complete would catch it.
3924            let likely_final = original_size < S3_MULTIPART_MIN_PART_BYTES as u64;
3925            if !likely_final {
3926                pad_to_minimum(&mut framed, S3_MULTIPART_MIN_PART_BYTES);
3927            }
3928            let framed_bytes = framed.freeze();
3929            let new_len = framed_bytes.len() as i64;
3930            // 同じ wire 互換問題が multipart にもある (content-length / checksum)
3931            req.input.content_length = Some(new_len);
3932            req.input.checksum_algorithm = None;
3933            req.input.checksum_crc32 = None;
3934            req.input.checksum_crc32c = None;
3935            req.input.checksum_crc64nvme = None;
3936            req.input.checksum_sha1 = None;
3937            req.input.checksum_sha256 = None;
3938            req.input.content_md5 = None;
3939            req.input.body = Some(bytes_to_blob(framed_bytes));
3940            debug!(
3941                part_number = ?req.input.part_number,
3942                upload_id = ?req.input.upload_id,
3943                original_size,
3944                framed_size = new_len,
3945                "S4 upload_part: framed compressed payload"
3946            );
3947        }
3948        self.backend.upload_part(req).await
3949    }
3950    async fn complete_multipart_upload(
3951        &self,
3952        mut req: S3Request<CompleteMultipartUploadInput>,
3953    ) -> S3Result<S3Response<CompleteMultipartUploadOutput>> {
3954        let bucket = req.input.bucket.clone();
3955        let key = req.input.key.clone();
3956        let upload_id = req.input.upload_id.clone();
3957        // v0.8.1 #59: serialise concurrent Complete invocations on the
3958        // same `(bucket, key)`. The race window the lock closes is the
3959        // GET-assembled-body → encrypt → PUT-encrypted-body triple
3960        // below (BUG-5 fix); without serialisation, two Completes for
3961        // different `upload_id` but the same logical key could each
3962        // read the other's plaintext assembled body and overwrite the
3963        // peer's encrypted result. The guard is held to function exit
3964        // (drop on `Ok` / `Err`), covering version-id mint, object-
3965        // lock apply, tagging persist, and replication enqueue too.
3966        let completion_lock = self.multipart_state.completion_lock(&bucket, &key);
3967        let _completion_guard = completion_lock.lock().await;
3968        // v0.8 #54 — fetch the per-upload context captured on Create.
3969        // `None` means an abandoned / unknown upload_id (gateway
3970        // crashed between Create and Complete, or pre-v0.8 state
3971        // restore); we still let the backend do its thing for
3972        // transparency, but we can't apply any SSE / version / lock /
3973        // tag / replication post-processing because we never captured
3974        // the recipe.
3975        let ctx = self.multipart_state.get(upload_id.as_str());
3976        // v0.8 #54 BUG-10 fix: same SSE-C header strip as upload_part
3977        // — some clients (boto3 / aws-sdk-cpp older versions) replay
3978        // the SSE-C triple on Complete too, and MinIO will choke if
3979        // they reach the backend.
3980        let _ = req.input.sse_customer_algorithm.take();
3981        let _ = req.input.sse_customer_key.take();
3982        let _ = req.input.sse_customer_key_md5.take();
3983        let mut resp = self.backend.complete_multipart_upload(req).await?;
3984        // CompleteMultipartUpload 成功 → 完成した object を full fetch して frame
3985        // index を build、`<key>.s4index` sidecar として保存。これで Range GET の
3986        // partial fetch path が利用可能になる (Range request の帯域節約)。
3987        // 注: 巨大 object の場合この pass は重いが、Range query は一度 sidecar が
3988        // できれば爆速になるので 1 回の cost は payback される
3989        //
3990        // v0.8 #54 BUG-5..9: this same fetch is the choke-point for
3991        // the SSE encrypt re-PUT + versioning shadow-key rewrite +
3992        // replication source-bytes capture, so we GET once and reuse
3993        // the bytes for every post-processing step.
3994        let assembled_body: Option<bytes::Bytes> = if let Ok(uri) = safe_object_uri(&bucket, &key) {
3995            let get_input = GetObjectInput {
3996                bucket: bucket.clone(),
3997                key: key.clone(),
3998                ..Default::default()
3999            };
4000            let get_req = S3Request {
4001                input: get_input,
4002                method: http::Method::GET,
4003                uri,
4004                headers: http::HeaderMap::new(),
4005                extensions: http::Extensions::new(),
4006                credentials: None,
4007                region: None,
4008                service: None,
4009                trailing_headers: None,
4010            };
4011            match self.backend.get_object(get_req).await {
4012                Ok(get_resp) => match get_resp.output.body {
4013                    Some(blob) => collect_blob(blob, self.max_body_bytes).await.ok(),
4014                    None => None,
4015                },
4016                Err(e) => {
4017                    // v0.8.4 #71 (C-1 audit fix): a silent
4018                    // `Err(_) => None` here is a SSE plaintext
4019                    // leak. The post-processing block below only
4020                    // runs the SSE re-encrypt branch when
4021                    // `assembled_body.is_some()`, so swallowing a
4022                    // backend error skipped the encrypt step and
4023                    // left the multipart object on disk as
4024                    // plaintext, even on SSE-S4 / SSE-C / SSE-KMS
4025                    // configured buckets. Same root-cause family
4026                    // as v0.8 BUG-5; this branch closes the
4027                    // remaining read-side window.
4028                    //
4029                    // We distinguish two cases:
4030                    //  - `NoSuchKey`: the object is genuinely
4031                    //    missing post-Complete. This is rare and
4032                    //    typically races with a concurrent
4033                    //    DeleteObject; there is nothing to re-
4034                    //    encrypt and no SSE markers to honour, so
4035                    //    falling through to the legacy
4036                    //    `assembled_body = None` path is safe.
4037                    //  - everything else (5xx, network, auth,
4038                    //    etc.): we must FAIL the Complete so the
4039                    //    client can retry. Returning Ok with
4040                    //    `assembled_body = None` would silently
4041                    //    skip the SSE re-encrypt and leave the
4042                    //    backend bytes plaintext.
4043                    if matches!(e.code(), &S3ErrorCode::NoSuchKey) {
4044                        tracing::warn!(
4045                            bucket = %bucket,
4046                            key = %key,
4047                            "multipart Complete: backend GET returned NoSuchKey; \
4048                             skipping post-processing (object likely raced with DeleteObject)"
4049                        );
4050                        None
4051                    } else {
4052                        tracing::error!(
4053                            bucket = %bucket,
4054                            key = %key,
4055                            error = %e,
4056                            "multipart Complete: backend GET failed; failing the Complete \
4057                             so the client retries (silent fall-through would skip SSE \
4058                             re-encrypt and store plaintext)"
4059                        );
4060                        return Err(internal("multipart Complete: backend body fetch failed")(e));
4061                    }
4062                }
4063            }
4064        } else {
4065            None
4066        };
4067        // Sidecar build (existing behaviour, gated on assembled body).
4068        if let Some(ref body) = assembled_body
4069            && let Ok(index) = build_index_from_body(body)
4070        {
4071            self.write_sidecar(&bucket, &key, &index).await;
4072        }
4073        // From here on, post-processing depends on the context —
4074        // short-circuit when the upload had no captured recipe
4075        // (legacy / crashed-Create / pre-v0.8 state restore).
4076        if let Some(ctx) = ctx {
4077            // v0.8 #54 BUG-6 fix: mint a version-id when the bucket
4078            // is versioning-Enabled. The single-PUT path does this in
4079            // `put_object` ~L1968; multipart was the missing branch.
4080            // We mint here (post-Complete, before any re-PUT) so the
4081            // same vid threads into both the shadow-key rewrite and
4082            // the VersionEntry the manager records.
4083            let pending_version: Option<crate::versioning::PutOutcome> = self
4084                .versioning
4085                .as_ref()
4086                .map(|mgr| mgr.state(&bucket))
4087                .map(|state| match state {
4088                    crate::versioning::VersioningState::Enabled => crate::versioning::PutOutcome {
4089                        version_id: crate::versioning::VersioningManager::new_version_id(),
4090                        versioned_response: true,
4091                    },
4092                    crate::versioning::VersioningState::Suspended
4093                    | crate::versioning::VersioningState::Unversioned => {
4094                        crate::versioning::PutOutcome {
4095                            version_id: crate::versioning::NULL_VERSION_ID.to_owned(),
4096                            versioned_response: false,
4097                        }
4098                    }
4099                });
4100            // v0.8 #54 BUG-5 fix: encrypt the assembled framed body
4101            // and re-PUT it to the backend so the on-disk bytes are
4102            // SSE-encrypted. The single-PUT path does this body-by-
4103            // body inside `put_object` (L1907-L1942); for multipart,
4104            // encrypt-per-part would require a multi-segment decrypt
4105            // path on GET — we instead do a single encrypt over the
4106            // assembled framed body so the existing GET decrypt
4107            // branch (`is_sse_encrypted` → `decrypt(body, source)` →
4108            // FrameIter) handles it unchanged.
4109            //
4110            // The cost is one extra round-trip per Complete for SSE-
4111            // enabled multipart (already-paid for the sidecar build).
4112            // For single-instance gateways pointing at a co-located
4113            // backend this is negligible; cross-region operators
4114            // would benefit from per-part encrypt + multi-segment
4115            // decrypt as a follow-up.
4116            let needs_re_put = matches!(
4117                ctx.sse,
4118                crate::multipart_state::MultipartSseMode::SseS4
4119                    | crate::multipart_state::MultipartSseMode::SseC { .. }
4120                    | crate::multipart_state::MultipartSseMode::SseKms { .. }
4121            ) || pending_version
4122                .as_ref()
4123                .map(|pv| pv.versioned_response)
4124                .unwrap_or(false);
4125            // Snapshot replication body in advance so we can pass it
4126            // to the spawn helper after the (possibly absent) re-PUT.
4127            let replication_body = assembled_body.clone();
4128            let mut applied_metadata: Option<std::collections::HashMap<String, String>> = None;
4129            if needs_re_put && let Some(body) = assembled_body {
4130                // v0.8.1 #58: same Zeroizing pattern as put_object's
4131                // single-PUT KMS branch — DEK plaintext lives in
4132                // `Zeroizing<[u8; 32]>` for the lifetime of this
4133                // Complete handler, then is wiped on drop.
4134                let kms_wrap: Option<(zeroize::Zeroizing<[u8; 32]>, crate::kms::WrappedDek)> =
4135                    if let crate::multipart_state::MultipartSseMode::SseKms { ref key_id } = ctx.sse
4136                    {
4137                        let kms = self.kms.as_ref().ok_or_else(|| {
4138                        S3Error::with_message(
4139                            S3ErrorCode::InvalidRequest,
4140                            "SSE-KMS requested but no --kms-local-dir / --kms-aws-region is configured on this gateway",
4141                        )
4142                    })?;
4143                        let (dek, wrapped) =
4144                            kms.generate_dek(key_id).await.map_err(kms_error_to_s3)?;
4145                        if dek.len() != 32 {
4146                            return Err(S3Error::with_message(
4147                                S3ErrorCode::InternalError,
4148                                format!(
4149                                    "KMS backend returned a DEK of {} bytes (expected 32)",
4150                                    dek.len()
4151                                ),
4152                            ));
4153                        }
4154                        let mut dek_arr: zeroize::Zeroizing<[u8; 32]> =
4155                            zeroize::Zeroizing::new([0u8; 32]);
4156                        dek_arr.copy_from_slice(&dek);
4157                        // `dek` (Zeroizing<Vec<u8>>) is dropped at scope end.
4158                        Some((dek_arr, wrapped))
4159                    } else {
4160                        None
4161                    };
4162                // Build the new metadata map: re-fetch via HEAD so
4163                // the multipart / codec markers the backend stamped
4164                // on Create flow through unchanged, then layer the
4165                // SSE markers on top.
4166                let head_req = S3Request {
4167                    input: HeadObjectInput {
4168                        bucket: bucket.clone(),
4169                        key: key.clone(),
4170                        ..Default::default()
4171                    },
4172                    method: http::Method::HEAD,
4173                    uri: safe_object_uri(&bucket, &key)?,
4174                    headers: http::HeaderMap::new(),
4175                    extensions: http::Extensions::new(),
4176                    credentials: None,
4177                    region: None,
4178                    service: None,
4179                    trailing_headers: None,
4180                };
4181                let mut new_metadata: std::collections::HashMap<String, String> =
4182                    match self.backend.head_object(head_req).await {
4183                        Ok(h) => h.output.metadata.unwrap_or_default(),
4184                        Err(_) => std::collections::HashMap::new(),
4185                    };
4186                let new_body = match &ctx.sse {
4187                    crate::multipart_state::MultipartSseMode::SseC { key, key_md5 } => {
4188                        new_metadata.insert("s4-encrypted".into(), "aes-256-gcm".into());
4189                        new_metadata.insert("s4-sse-type".into(), "AES256".into());
4190                        new_metadata.insert(
4191                            "s4-sse-c-key-md5".into(),
4192                            base64::engine::general_purpose::STANDARD.encode(key_md5),
4193                        );
4194                        // v0.8.2 #62: `key` is `&Zeroizing<[u8; 32]>`;
4195                        // auto-deref through one explicit binding so
4196                        // `SseSource::CustomerKey` gets the `&[u8; 32]`
4197                        // it expects (mirrors the SSE-KMS DEK shape
4198                        // a few lines down).
4199                        let key_ref: &[u8; 32] = key;
4200                        crate::sse::encrypt_with_source(
4201                            &body,
4202                            crate::sse::SseSource::CustomerKey {
4203                                key: key_ref,
4204                                key_md5,
4205                            },
4206                        )
4207                    }
4208                    crate::multipart_state::MultipartSseMode::SseKms { .. } => {
4209                        let (dek, wrapped) = kms_wrap
4210                            .as_ref()
4211                            .expect("SseKms branch implies kms_wrap is Some");
4212                        new_metadata.insert("s4-encrypted".into(), "aes-256-gcm".into());
4213                        new_metadata.insert("s4-sse-type".into(), "aws:kms".into());
4214                        new_metadata.insert("s4-sse-kms-key-id".into(), wrapped.key_id.clone());
4215                        // v0.8.1 #58: auto-deref from `&Zeroizing<[u8; 32]>`
4216                        // to `&[u8; 32]` (same shape as the put_object
4217                        // single-PUT branch).
4218                        let dek_ref: &[u8; 32] = dek;
4219                        crate::sse::encrypt_with_source(
4220                            &body,
4221                            crate::sse::SseSource::Kms {
4222                                dek: dek_ref,
4223                                wrapped,
4224                            },
4225                        )
4226                    }
4227                    crate::multipart_state::MultipartSseMode::SseS4 => {
4228                        let keyring = self.sse_keyring.as_ref().ok_or_else(|| {
4229                            S3Error::with_message(
4230                                S3ErrorCode::InternalError,
4231                                "SSE-S4 captured at Create but keyring missing at Complete",
4232                            )
4233                        })?;
4234                        new_metadata.insert("s4-encrypted".into(), "aes-256-gcm".into());
4235                        // SSE-S4 deliberately omits `s4-sse-type` so
4236                        // HEAD doesn't falsely advertise AWS-style
4237                        // SSE-S3 (matches the put_object L1929-L1939
4238                        // comment).
4239                        // v0.8 #52: same chunk_size dispatch as the
4240                        // single-PUT branch — multipart Complete
4241                        // re-encrypts the assembled body, so honoring
4242                        // the chunked path here is required to keep
4243                        // GET streaming on multipart-uploaded objects.
4244                        if self.sse_chunk_size > 0 {
4245                            crate::sse::encrypt_v2_chunked(&body, keyring, self.sse_chunk_size)
4246                                .map_err(|e| {
4247                                    S3Error::with_message(
4248                                        S3ErrorCode::InternalError,
4249                                        format!("SSE-S4 chunked encrypt failed at Complete: {e}"),
4250                                    )
4251                                })?
4252                        } else {
4253                            crate::sse::encrypt_v2(&body, keyring)
4254                        }
4255                    }
4256                    crate::multipart_state::MultipartSseMode::None => body.clone(),
4257                };
4258                // v0.8 #54 BUG-6 fix: write the re-PUT under the
4259                // shadow key so the version chain doesn't overwrite
4260                // the previous version on a versioned bucket. The
4261                // original (unshadowed) key was assembled by the
4262                // backend on Complete; we delete it after the shadow
4263                // PUT lands.
4264                let put_target_key = if let Some(pv) = pending_version.as_ref() {
4265                    if pv.versioned_response {
4266                        versioned_shadow_key(&key, &pv.version_id)
4267                    } else {
4268                        key.clone()
4269                    }
4270                } else {
4271                    key.clone()
4272                };
4273                let new_body_len = new_body.len() as i64;
4274                let put_req = S3Request {
4275                    input: PutObjectInput {
4276                        bucket: bucket.clone(),
4277                        key: put_target_key.clone(),
4278                        body: Some(bytes_to_blob(new_body.clone())),
4279                        metadata: Some(new_metadata.clone()),
4280                        content_length: Some(new_body_len),
4281                        ..Default::default()
4282                    },
4283                    method: http::Method::PUT,
4284                    uri: safe_object_uri(&bucket, &put_target_key)?,
4285                    headers: http::HeaderMap::new(),
4286                    extensions: http::Extensions::new(),
4287                    credentials: None,
4288                    region: None,
4289                    service: None,
4290                    trailing_headers: None,
4291                };
4292                self.backend.put_object(put_req).await?;
4293                // If we rewrote the storage key (versioning shadow),
4294                // we must drop the original (unshadowed) Complete-
4295                // assembled bytes so subsequent listings don't see a
4296                // duplicate.
4297                if put_target_key != key {
4298                    let del_req = S3Request {
4299                        input: DeleteObjectInput {
4300                            bucket: bucket.clone(),
4301                            key: key.clone(),
4302                            ..Default::default()
4303                        },
4304                        method: http::Method::DELETE,
4305                        uri: safe_object_uri(&bucket, &key)?,
4306                        headers: http::HeaderMap::new(),
4307                        extensions: http::Extensions::new(),
4308                        credentials: None,
4309                        region: None,
4310                        service: None,
4311                        trailing_headers: None,
4312                    };
4313                    let _ = self.backend.delete_object(del_req).await;
4314                }
4315                applied_metadata = Some(new_metadata);
4316            }
4317            // v0.8 #54 BUG-6 commit: register the new version with
4318            // the VersioningManager so list_object_versions /
4319            // GET ?versionId= see it.
4320            if let (Some(mgr), Some(pv)) = (self.versioning.as_ref(), pending_version.as_ref()) {
4321                let etag = resp
4322                    .output
4323                    .e_tag
4324                    .clone()
4325                    .map(ETag::into_value)
4326                    .unwrap_or_default();
4327                let now = chrono::Utc::now();
4328                mgr.commit_put_with_version(
4329                    &bucket,
4330                    &key,
4331                    crate::versioning::VersionEntry {
4332                        version_id: pv.version_id.clone(),
4333                        etag,
4334                        size: replication_body
4335                            .as_ref()
4336                            .map(|b| b.len() as u64)
4337                            .unwrap_or(0),
4338                        is_delete_marker: false,
4339                        created_at: now,
4340                    },
4341                );
4342                if pv.versioned_response {
4343                    resp.output.version_id = Some(pv.version_id.clone());
4344                }
4345            }
4346            // v0.8 #54 BUG-7 fix: persist any per-upload Object Lock
4347            // recipe + auto-apply the bucket default. Mirrors the
4348            // put_object L2057-L2074 block.
4349            if let Some(mgr) = self.object_lock.as_ref() {
4350                if ctx.object_lock_mode.is_some()
4351                    || ctx.object_lock_retain_until.is_some()
4352                    || ctx.object_lock_legal_hold
4353                {
4354                    let mut state = mgr.get(&bucket, &key).unwrap_or_default();
4355                    if let Some(m) = ctx.object_lock_mode {
4356                        state.mode = Some(m);
4357                    }
4358                    if let Some(u) = ctx.object_lock_retain_until {
4359                        state.retain_until = Some(u);
4360                    }
4361                    if ctx.object_lock_legal_hold {
4362                        state.legal_hold_on = true;
4363                    }
4364                    mgr.set(&bucket, &key, state);
4365                }
4366                mgr.apply_default_on_put(&bucket, &key, chrono::Utc::now());
4367            }
4368            // v0.8 #54 BUG-9 fix: persist the captured tags via the
4369            // TagManager so GetObjectTagging returns them.
4370            if let (Some(mgr), Some(tags)) = (self.tagging.as_ref(), ctx.tags.as_ref()) {
4371                mgr.put_object_tags(&bucket, &key, tags.clone());
4372            }
4373            // SSE-C / SSE-KMS response echo. The
4374            // CompleteMultipartUploadOutput only exposes
4375            // `server_side_encryption` + `ssekms_key_id` (no
4376            // sse_customer_* — those round-tripped on Create / parts).
4377            match &ctx.sse {
4378                crate::multipart_state::MultipartSseMode::SseC { .. } => {
4379                    resp.output.server_side_encryption = Some(ServerSideEncryption::from_static(
4380                        ServerSideEncryption::AES256,
4381                    ));
4382                }
4383                crate::multipart_state::MultipartSseMode::SseKms { key_id } => {
4384                    resp.output.server_side_encryption = Some(ServerSideEncryption::from_static(
4385                        ServerSideEncryption::AWS_KMS,
4386                    ));
4387                    resp.output.ssekms_key_id = Some(key_id.clone());
4388                }
4389                _ => {}
4390            }
4391            // v0.8 #54 BUG-8 fix: fire cross-bucket replication just
4392            // like put_object L2165 does. We hand the dispatcher the
4393            // assembled body bytes (post-encrypt where applicable, so
4394            // the destination ends up byte-identical to the source's
4395            // on-disk shape) plus the metadata that was actually
4396            // committed.
4397            let replication_body_bytes = replication_body.unwrap_or_default();
4398            // v0.8.2 #61: thread the multipart-Complete `pending_version`
4399            // through so a versioning-Enabled source's destination
4400            // receives the same shadow-key path (mirror of the
4401            // single-PUT branch above).
4402            self.spawn_replication_if_matched(
4403                &bucket,
4404                &key,
4405                &ctx.tags,
4406                &replication_body_bytes,
4407                &applied_metadata,
4408                true,
4409                pending_version.as_ref(),
4410            );
4411            self.multipart_state.remove(upload_id.as_str());
4412        }
4413        // v0.8.1 #59 janitor: best-effort sweep of stale completion
4414        // locks while we are still on the critical path of a single
4415        // Complete (so steady-state workloads of unique keys don't
4416        // accumulate `DashMap` entries). The sweep only retires
4417        // entries whose `Arc::strong_count == 1`, so any other in-
4418        // flight Complete on a different key keeps its lock alive.
4419        // Our own `_completion_guard` keeps `bucket`/`key`'s entry
4420        // alive across this call; it's reaped on the next Complete or
4421        // the next caller-driven prune.
4422        self.multipart_state.prune_completion_locks();
4423        Ok(resp)
4424    }
4425    async fn abort_multipart_upload(
4426        &self,
4427        req: S3Request<AbortMultipartUploadInput>,
4428    ) -> S3Result<S3Response<AbortMultipartUploadOutput>> {
4429        // v0.8 #54: drop the per-upload state (SSE-C key bytes / tag
4430        // set) promptly so an aborted upload doesn't leak the
4431        // customer's key into a long-running gateway's RSS.
4432        //
4433        // v0.8.4 #71 (H-7 audit fix): backend.abort_multipart_upload
4434        // FIRST, then drop in-process state ONLY on success. The
4435        // previous order ("remove → call backend") meant a transient
4436        // backend abort failure (5xx, network) wiped the SSE-C key
4437        // bytes locally while leaving the parts on the backend, so a
4438        // client retry would have to re-validate the SSE-C key against
4439        // a context the gateway no longer has — and the retried abort
4440        // would still hit the unaborted backend parts. Calling the
4441        // backend first lets the failure propagate to the client with
4442        // state intact for a clean retry; only on success do we wipe
4443        // the local state.
4444        let upload_id = req.input.upload_id.as_str().to_owned();
4445        let resp = self.backend.abort_multipart_upload(req).await?;
4446        self.multipart_state.remove(&upload_id);
4447        Ok(resp)
4448    }
4449    async fn list_multipart_uploads(
4450        &self,
4451        req: S3Request<ListMultipartUploadsInput>,
4452    ) -> S3Result<S3Response<ListMultipartUploadsOutput>> {
4453        self.backend.list_multipart_uploads(req).await
4454    }
4455    async fn list_parts(
4456        &self,
4457        req: S3Request<ListPartsInput>,
4458    ) -> S3Result<S3Response<ListPartsOutput>> {
4459        self.backend.list_parts(req).await
4460    }
4461
4462    // =========================================================================
4463    // Phase 2 — pure passthrough delegations。S4 はこれらに対して圧縮 hook を
4464    // 持たないので、backend (= AWS S3) の動作と完全に同一。
4465    //
4466    // 既知の制限事項:
4467    // - copy_object / upload_part_copy: source object が S4-compressed の場合、
4468    //   backend が bytes を copy するだけなので metadata (s4-codec etc) も一緒に
4469    //   coppied される (AWS S3 default = MetadataDirective COPY)。GET は manifest
4470    //   経由で正しく decompress できる。MetadataDirective REPLACE で上書き
4471    //   されると圧縮 metadata が消えて壊れる — 顧客側の運用で注意
4472    // - list_object_versions: versioning enabled bucket では各 version も S4
4473    //   metadata を維持する。古い version も S4 経由で正しく GET できる。
4474    // =========================================================================
4475
4476    // ---- Object ACL / tagging / attributes ----
4477    async fn get_object_acl(
4478        &self,
4479        req: S3Request<GetObjectAclInput>,
4480    ) -> S3Result<S3Response<GetObjectAclOutput>> {
4481        self.backend.get_object_acl(req).await
4482    }
4483    async fn put_object_acl(
4484        &self,
4485        req: S3Request<PutObjectAclInput>,
4486    ) -> S3Result<S3Response<PutObjectAclOutput>> {
4487        self.backend.put_object_acl(req).await
4488    }
4489    // v0.6 #39: object tagging — when a `TagManager` is attached the
4490    // configuration / per-(bucket, key) state lives in the manager and
4491    // these handlers serve directly from it; when no manager is
4492    // attached they fall back to the backend (legacy passthrough so
4493    // v0.5 deployments are unaffected).
4494    async fn get_object_tagging(
4495        &self,
4496        req: S3Request<GetObjectTaggingInput>,
4497    ) -> S3Result<S3Response<GetObjectTaggingOutput>> {
4498        let Some(mgr) = self.tagging.as_ref() else {
4499            return self.backend.get_object_tagging(req).await;
4500        };
4501        let tags = mgr
4502            .get_object_tags(&req.input.bucket, &req.input.key)
4503            .unwrap_or_default();
4504        Ok(S3Response::new(GetObjectTaggingOutput {
4505            tag_set: tagset_to_aws(&tags),
4506            ..Default::default()
4507        }))
4508    }
4509    async fn put_object_tagging(
4510        &self,
4511        req: S3Request<PutObjectTaggingInput>,
4512    ) -> S3Result<S3Response<PutObjectTaggingOutput>> {
4513        let Some(mgr) = self.tagging.as_ref() else {
4514            return self.backend.put_object_tagging(req).await;
4515        };
4516        let bucket = req.input.bucket.clone();
4517        let key = req.input.key.clone();
4518        let parsed = aws_to_tagset(&req.input.tagging.tag_set)
4519            .map_err(|e| S3Error::with_message(S3ErrorCode::InvalidArgument, e.to_string()))?;
4520        // v0.6 #39: gate via IAM policy with both the request tags
4521        // (`s3:RequestObjectTag/<key>`) and any existing tags on the
4522        // target object (`s3:ExistingObjectTag/<key>`).
4523        let existing = mgr.get_object_tags(&bucket, &key);
4524        self.enforce_policy_with_extra(
4525            &req,
4526            "s3:PutObjectTagging",
4527            &bucket,
4528            Some(&key),
4529            Some(&parsed),
4530            existing.as_ref(),
4531        )?;
4532        mgr.put_object_tags(&bucket, &key, parsed);
4533        Ok(S3Response::new(PutObjectTaggingOutput::default()))
4534    }
4535    async fn delete_object_tagging(
4536        &self,
4537        req: S3Request<DeleteObjectTaggingInput>,
4538    ) -> S3Result<S3Response<DeleteObjectTaggingOutput>> {
4539        let Some(mgr) = self.tagging.as_ref() else {
4540            return self.backend.delete_object_tagging(req).await;
4541        };
4542        let bucket = req.input.bucket.clone();
4543        let key = req.input.key.clone();
4544        let existing = mgr.get_object_tags(&bucket, &key);
4545        self.enforce_policy_with_extra(
4546            &req,
4547            "s3:DeleteObjectTagging",
4548            &bucket,
4549            Some(&key),
4550            None,
4551            existing.as_ref(),
4552        )?;
4553        mgr.delete_object_tags(&bucket, &key);
4554        Ok(S3Response::new(DeleteObjectTaggingOutput::default()))
4555    }
4556    async fn get_object_attributes(
4557        &self,
4558        req: S3Request<GetObjectAttributesInput>,
4559    ) -> S3Result<S3Response<GetObjectAttributesOutput>> {
4560        self.backend.get_object_attributes(req).await
4561    }
4562    async fn restore_object(
4563        &self,
4564        req: S3Request<RestoreObjectInput>,
4565    ) -> S3Result<S3Response<RestoreObjectOutput>> {
4566        self.backend.restore_object(req).await
4567    }
4568    async fn upload_part_copy(
4569        &self,
4570        req: S3Request<UploadPartCopyInput>,
4571    ) -> S3Result<S3Response<UploadPartCopyOutput>> {
4572        // v0.2 #6: byte-range aware copy when the source is S4-framed.
4573        //
4574        // For a framed source (multipart upload OR single-PUT framed-v2),
4575        // a naive byte-range passthrough would copy compressed bytes that
4576        // don't align with S4 frame boundaries — silently corrupting the
4577        // result. Instead we GET the source through S4 (which handles
4578        // decompression + Range), re-compress + re-frame as a new part,
4579        // and forward as upload_part. For non-framed sources (S4-untouched
4580        // raw objects), passthrough is correct and we keep the original
4581        // (cheaper) code path.
4582        // v0.8.4 #74: propagate the optional `?versionId=<vid>` from the
4583        // copy-source header. Without this, a versioned source bucket
4584        // copy that pins a specific old version would silently fall
4585        // back to "latest", assembling wrong bytes into the destination
4586        // multipart object (silent data corruption).
4587        let CopySource::Bucket {
4588            bucket: src_bucket,
4589            key: src_key,
4590            version_id: src_version_id,
4591        } = &req.input.copy_source
4592        else {
4593            return self.backend.upload_part_copy(req).await;
4594        };
4595        let src_bucket = src_bucket.to_string();
4596        let src_key = src_key.to_string();
4597        let src_version_id: Option<String> = src_version_id.as_deref().map(str::to_owned);
4598
4599        // Probe metadata to decide whether the source needs S4-aware copy.
4600        let head_input = HeadObjectInput {
4601            bucket: src_bucket.clone(),
4602            key: src_key.clone(),
4603            version_id: src_version_id.clone(),
4604            ..Default::default()
4605        };
4606        let head_req = S3Request {
4607            input: head_input,
4608            method: http::Method::HEAD,
4609            uri: req.uri.clone(),
4610            headers: req.headers.clone(),
4611            extensions: http::Extensions::new(),
4612            credentials: req.credentials.clone(),
4613            region: req.region.clone(),
4614            service: req.service.clone(),
4615            trailing_headers: None,
4616        };
4617        let needs_s4_copy = match self.backend.head_object(head_req).await {
4618            Ok(h) => {
4619                is_multipart_object(&h.output.metadata) || is_framed_v2_object(&h.output.metadata)
4620            }
4621            Err(_) => false,
4622        };
4623        if !needs_s4_copy {
4624            return self.backend.upload_part_copy(req).await;
4625        }
4626
4627        // Resolve the optional source byte range to pass to GET.
4628        let source_range = req
4629            .input
4630            .copy_source_range
4631            .as_ref()
4632            .map(|r| parse_copy_source_range(r))
4633            .transpose()
4634            .map_err(|e| S3Error::with_message(S3ErrorCode::InvalidRange, e))?;
4635
4636        // GET source via S4 (handles decompression + sidecar partial fetch
4637        // when range is present). The result is the requested user-visible
4638        // byte range, fully decompressed. version_id is propagated so
4639        // pinned-version copies fetch the exact version requested.
4640        let mut get_input = GetObjectInput {
4641            bucket: src_bucket.clone(),
4642            key: src_key.clone(),
4643            version_id: src_version_id.clone(),
4644            ..Default::default()
4645        };
4646        get_input.range = source_range;
4647        let get_req = S3Request {
4648            input: get_input,
4649            method: http::Method::GET,
4650            uri: req.uri.clone(),
4651            headers: req.headers.clone(),
4652            extensions: http::Extensions::new(),
4653            credentials: req.credentials.clone(),
4654            region: req.region.clone(),
4655            service: req.service.clone(),
4656            trailing_headers: None,
4657        };
4658        let get_resp = self.get_object(get_req).await?;
4659        let blob = get_resp.output.body.ok_or_else(|| {
4660            S3Error::with_message(
4661                S3ErrorCode::InternalError,
4662                "upload_part_copy: empty body from source GET",
4663            )
4664        })?;
4665        let bytes = collect_blob(blob, self.max_body_bytes)
4666            .await
4667            .map_err(internal("collect upload_part_copy source body"))?;
4668
4669        // Compress + frame as a fresh part (mirrors upload_part path).
4670        let sample_len = bytes.len().min(SAMPLE_BYTES);
4671        // v0.8 #56: same size-hint promotion as the upload_part path.
4672        let codec_kind = self
4673            .dispatcher
4674            .pick_with_size_hint(&bytes[..sample_len], Some(bytes.len() as u64))
4675            .await;
4676        let original_size = bytes.len() as u64;
4677        // v0.8 #55: telemetry-returning compress (GPU metrics stamp).
4678        let (compress_res, tel) = self
4679            .registry
4680            .compress_with_telemetry(bytes, codec_kind)
4681            .await;
4682        stamp_gpu_compress_telemetry(&tel);
4683        let (compressed, manifest) =
4684            compress_res.map_err(internal("registry compress upload_part_copy"))?;
4685        let header = FrameHeader {
4686            codec: codec_kind,
4687            original_size,
4688            compressed_size: compressed.len() as u64,
4689            crc32c: manifest.crc32c,
4690        };
4691        let mut framed = BytesMut::with_capacity(FRAME_HEADER_BYTES + compressed.len());
4692        write_frame(&mut framed, header, &compressed);
4693        let likely_final = original_size < S3_MULTIPART_MIN_PART_BYTES as u64;
4694        if !likely_final {
4695            pad_to_minimum(&mut framed, S3_MULTIPART_MIN_PART_BYTES);
4696        }
4697        let framed_bytes = framed.freeze();
4698        let framed_len = framed_bytes.len() as i64;
4699
4700        // Forward as upload_part to the destination multipart upload.
4701        let part_input = UploadPartInput {
4702            bucket: req.input.bucket.clone(),
4703            key: req.input.key.clone(),
4704            part_number: req.input.part_number,
4705            upload_id: req.input.upload_id.clone(),
4706            body: Some(bytes_to_blob(framed_bytes)),
4707            content_length: Some(framed_len),
4708            ..Default::default()
4709        };
4710        let part_req = S3Request {
4711            input: part_input,
4712            method: http::Method::PUT,
4713            uri: req.uri.clone(),
4714            headers: req.headers.clone(),
4715            extensions: http::Extensions::new(),
4716            credentials: req.credentials.clone(),
4717            region: req.region.clone(),
4718            service: req.service.clone(),
4719            trailing_headers: None,
4720        };
4721        let upload_resp = self.backend.upload_part(part_req).await?;
4722
4723        let copy_output = UploadPartCopyOutput {
4724            copy_part_result: Some(CopyPartResult {
4725                e_tag: upload_resp.output.e_tag.clone(),
4726                ..Default::default()
4727            }),
4728            ..Default::default()
4729        };
4730        Ok(S3Response::new(copy_output))
4731    }
4732
4733    // ---- Object lock / retention / legal hold (v0.5 #30) ----
4734    //
4735    // When an `ObjectLockManager` is attached the configuration / per-object
4736    // state lives in the manager and these handlers serve directly from it;
4737    // when no manager is attached they fall back to the backend (legacy
4738    // passthrough so v0.4 deployments are unaffected).
4739    async fn get_object_lock_configuration(
4740        &self,
4741        req: S3Request<GetObjectLockConfigurationInput>,
4742    ) -> S3Result<S3Response<GetObjectLockConfigurationOutput>> {
4743        if let Some(mgr) = self.object_lock.as_ref() {
4744            let cfg = mgr
4745                .bucket_default(&req.input.bucket)
4746                .map(|d| ObjectLockConfiguration {
4747                    object_lock_enabled: Some(ObjectLockEnabled::from_static(
4748                        ObjectLockEnabled::ENABLED,
4749                    )),
4750                    rule: Some(ObjectLockRule {
4751                        default_retention: Some(DefaultRetention {
4752                            days: Some(d.retention_days as i32),
4753                            mode: Some(ObjectLockRetentionMode::from_static(match d.mode {
4754                                crate::object_lock::LockMode::Governance => {
4755                                    ObjectLockRetentionMode::GOVERNANCE
4756                                }
4757                                crate::object_lock::LockMode::Compliance => {
4758                                    ObjectLockRetentionMode::COMPLIANCE
4759                                }
4760                            })),
4761                            years: None,
4762                        }),
4763                    }),
4764                });
4765            let output = GetObjectLockConfigurationOutput {
4766                object_lock_configuration: cfg,
4767            };
4768            return Ok(S3Response::new(output));
4769        }
4770        self.backend.get_object_lock_configuration(req).await
4771    }
4772    async fn put_object_lock_configuration(
4773        &self,
4774        req: S3Request<PutObjectLockConfigurationInput>,
4775    ) -> S3Result<S3Response<PutObjectLockConfigurationOutput>> {
4776        if let Some(mgr) = self.object_lock.as_ref() {
4777            let bucket = req.input.bucket.clone();
4778            if let Some(cfg) = req.input.object_lock_configuration.as_ref()
4779                && let Some(rule) = cfg.rule.as_ref()
4780                && let Some(d) = rule.default_retention.as_ref()
4781            {
4782                let mode = d
4783                    .mode
4784                    .as_ref()
4785                    .and_then(|m| crate::object_lock::LockMode::from_aws_str(m.as_str()))
4786                    .ok_or_else(|| {
4787                        S3Error::with_message(
4788                            S3ErrorCode::InvalidRequest,
4789                            "Object Lock default retention requires a valid Mode (GOVERNANCE | COMPLIANCE)",
4790                        )
4791                    })?;
4792                // S3 spec: exactly one of Days / Years (we accept Days
4793                // outright and convert Years → Days for storage; Years
4794                // is just a UX shorthand on the wire).
4795                let days: u32 = match (d.days, d.years) {
4796                    (Some(d), None) if d > 0 => d as u32,
4797                    (None, Some(y)) if y > 0 => (y as u32).saturating_mul(365),
4798                    _ => {
4799                        return Err(S3Error::with_message(
4800                            S3ErrorCode::InvalidRequest,
4801                            "Object Lock default retention requires exactly one of Days or Years (positive integer)",
4802                        ));
4803                    }
4804                };
4805                mgr.set_bucket_default(
4806                    &bucket,
4807                    crate::object_lock::BucketObjectLockDefault {
4808                        mode,
4809                        retention_days: days,
4810                    },
4811                );
4812            }
4813            return Ok(S3Response::new(PutObjectLockConfigurationOutput::default()));
4814        }
4815        self.backend.put_object_lock_configuration(req).await
4816    }
4817    async fn get_object_legal_hold(
4818        &self,
4819        req: S3Request<GetObjectLegalHoldInput>,
4820    ) -> S3Result<S3Response<GetObjectLegalHoldOutput>> {
4821        if let Some(mgr) = self.object_lock.as_ref() {
4822            let on = mgr
4823                .get(&req.input.bucket, &req.input.key)
4824                .map(|s| s.legal_hold_on)
4825                .unwrap_or(false);
4826            let status = ObjectLockLegalHoldStatus::from_static(if on {
4827                ObjectLockLegalHoldStatus::ON
4828            } else {
4829                ObjectLockLegalHoldStatus::OFF
4830            });
4831            let output = GetObjectLegalHoldOutput {
4832                legal_hold: Some(ObjectLockLegalHold {
4833                    status: Some(status),
4834                }),
4835            };
4836            return Ok(S3Response::new(output));
4837        }
4838        self.backend.get_object_legal_hold(req).await
4839    }
4840    async fn put_object_legal_hold(
4841        &self,
4842        req: S3Request<PutObjectLegalHoldInput>,
4843    ) -> S3Result<S3Response<PutObjectLegalHoldOutput>> {
4844        if let Some(mgr) = self.object_lock.as_ref() {
4845            let on = req
4846                .input
4847                .legal_hold
4848                .as_ref()
4849                .and_then(|h| h.status.as_ref())
4850                .map(|s| s.as_str().eq_ignore_ascii_case("ON"))
4851                .unwrap_or(false);
4852            mgr.set_legal_hold(&req.input.bucket, &req.input.key, on);
4853            return Ok(S3Response::new(PutObjectLegalHoldOutput::default()));
4854        }
4855        self.backend.put_object_legal_hold(req).await
4856    }
4857    async fn get_object_retention(
4858        &self,
4859        req: S3Request<GetObjectRetentionInput>,
4860    ) -> S3Result<S3Response<GetObjectRetentionOutput>> {
4861        if let Some(mgr) = self.object_lock.as_ref() {
4862            let retention = mgr
4863                .get(&req.input.bucket, &req.input.key)
4864                .filter(|s| s.mode.is_some() || s.retain_until.is_some())
4865                .map(|s| {
4866                    let mode = s.mode.map(|m| {
4867                        ObjectLockRetentionMode::from_static(match m {
4868                            crate::object_lock::LockMode::Governance => {
4869                                ObjectLockRetentionMode::GOVERNANCE
4870                            }
4871                            crate::object_lock::LockMode::Compliance => {
4872                                ObjectLockRetentionMode::COMPLIANCE
4873                            }
4874                        })
4875                    });
4876                    let until = s.retain_until.map(chrono_utc_to_timestamp);
4877                    ObjectLockRetention {
4878                        mode,
4879                        retain_until_date: until,
4880                    }
4881                });
4882            let output = GetObjectRetentionOutput { retention };
4883            return Ok(S3Response::new(output));
4884        }
4885        self.backend.get_object_retention(req).await
4886    }
4887    async fn put_object_retention(
4888        &self,
4889        req: S3Request<PutObjectRetentionInput>,
4890    ) -> S3Result<S3Response<PutObjectRetentionOutput>> {
4891        if let Some(mgr) = self.object_lock.as_ref() {
4892            let bucket = req.input.bucket.clone();
4893            let key = req.input.key.clone();
4894            let bypass = req.input.bypass_governance_retention.unwrap_or(false);
4895            let retention = req.input.retention.as_ref().ok_or_else(|| {
4896                S3Error::with_message(
4897                    S3ErrorCode::InvalidRequest,
4898                    "PutObjectRetention requires a Retention element",
4899                )
4900            })?;
4901            let new_mode = retention
4902                .mode
4903                .as_ref()
4904                .and_then(|m| crate::object_lock::LockMode::from_aws_str(m.as_str()));
4905            let new_until = retention
4906                .retain_until_date
4907                .as_ref()
4908                .map(timestamp_to_chrono_utc)
4909                .unwrap_or(None);
4910            let now = chrono::Utc::now();
4911            let existing = mgr.get(&bucket, &key).unwrap_or_default();
4912            // S3 immutability rules:
4913            //   - Compliance is one-way: once set, mode cannot move to
4914            //     Governance, and retain-until cannot be shortened.
4915            //   - Governance can be lengthened freely; shortened only
4916            //     with bypass=true.
4917            if let Some(existing_mode) = existing.mode
4918                && existing_mode == crate::object_lock::LockMode::Compliance
4919                && existing.is_locked(now)
4920            {
4921                if matches!(new_mode, Some(crate::object_lock::LockMode::Governance)) {
4922                    return Err(S3Error::with_message(
4923                        S3ErrorCode::AccessDenied,
4924                        "Cannot downgrade Compliance retention to Governance while lock is active",
4925                    ));
4926                }
4927                if let (Some(prev), Some(next)) = (existing.retain_until, new_until)
4928                    && next < prev
4929                {
4930                    return Err(S3Error::with_message(
4931                        S3ErrorCode::AccessDenied,
4932                        "Cannot shorten Compliance retention while lock is active",
4933                    ));
4934                }
4935            }
4936            if let Some(existing_mode) = existing.mode
4937                && existing_mode == crate::object_lock::LockMode::Governance
4938                && existing.is_locked(now)
4939                && !bypass
4940                && let (Some(prev), Some(next)) = (existing.retain_until, new_until)
4941                && next < prev
4942            {
4943                return Err(S3Error::with_message(
4944                    S3ErrorCode::AccessDenied,
4945                    "Shortening Governance retention requires x-amz-bypass-governance-retention: true",
4946                ));
4947            }
4948            let mut state = existing;
4949            if new_mode.is_some() {
4950                state.mode = new_mode;
4951            }
4952            if new_until.is_some() {
4953                state.retain_until = new_until;
4954            }
4955            mgr.set(&bucket, &key, state);
4956            return Ok(S3Response::new(PutObjectRetentionOutput::default()));
4957        }
4958        self.backend.put_object_retention(req).await
4959    }
4960
4961    // ---- Versioning ----
4962    // list_object_versions is implemented above in the compression-hook
4963    // section so it filters S4-internal sidecars (v0.4 #17) AND, when a
4964    // VersioningManager is attached (v0.5 #34), serves chains directly
4965    // from the in-memory index.
4966    async fn get_bucket_versioning(
4967        &self,
4968        req: S3Request<GetBucketVersioningInput>,
4969    ) -> S3Result<S3Response<GetBucketVersioningOutput>> {
4970        // v0.5 #34: when a VersioningManager is attached, the bucket's
4971        // versioning state lives in the manager (= S4-server's
4972        // authoritative source). Pass-through hits the backend only
4973        // when no manager is configured (legacy v0.4 behaviour).
4974        if let Some(mgr) = self.versioning.as_ref() {
4975            let output = match mgr.state(&req.input.bucket).as_aws_status() {
4976                Some(s) => GetBucketVersioningOutput {
4977                    status: Some(BucketVersioningStatus::from(s.to_owned())),
4978                    ..Default::default()
4979                },
4980                None => GetBucketVersioningOutput::default(),
4981            };
4982            return Ok(S3Response::new(output));
4983        }
4984        self.backend.get_bucket_versioning(req).await
4985    }
4986    async fn put_bucket_versioning(
4987        &self,
4988        req: S3Request<PutBucketVersioningInput>,
4989    ) -> S3Result<S3Response<PutBucketVersioningOutput>> {
4990        // v0.6 #42: MFA gating on the `PutBucketVersioning` request
4991        // itself. S3 spec: when the request body carries an
4992        // `MfaDelete` element (either `Enabled` or `Disabled`), the
4993        // request must include a valid `x-amz-mfa` token — both for
4994        // the *first* enable (so the operator can't quietly side-step
4995        // the gate by never enabling it) and for any subsequent
4996        // change (so a leaked credential alone can't disable MFA
4997        // Delete to bypass it on subsequent DELETEs). Requests that
4998        // omit the `MfaDelete` element entirely (i.e. they flip only
4999        // `Status`) skip this gate, matching AWS.
5000        if let Some(mgr) = self.mfa_delete.as_ref()
5001            && let Some(target_enabled) = req
5002                .input
5003                .versioning_configuration
5004                .mfa_delete
5005                .as_ref()
5006                .map(|m| m.as_str().eq_ignore_ascii_case("Enabled"))
5007        {
5008            let bucket = req.input.bucket.clone();
5009            let header = req.input.mfa.as_deref();
5010            let secret = mgr.lookup_secret(&bucket);
5011            let verified = match (header, secret.as_ref()) {
5012                (Some(h), Some(s)) => match crate::mfa::parse_mfa_header(h) {
5013                    Ok((serial, code)) => {
5014                        serial == s.serial
5015                            && crate::mfa::verify_totp(&s.secret_base32, &code, current_unix_secs())
5016                    }
5017                    Err(_) => false,
5018                },
5019                _ => false,
5020            };
5021            if !verified {
5022                crate::metrics::record_mfa_delete_denial(&bucket);
5023                let err = if header.is_none() {
5024                    crate::mfa::MfaError::Missing
5025                } else {
5026                    crate::mfa::MfaError::InvalidCode
5027                };
5028                return Err(mfa_error_to_s3(err));
5029            }
5030            mgr.set_bucket_state(&bucket, target_enabled);
5031        }
5032        // v0.5 #34: stash the new state in the manager, then forward to
5033        // the backend so any downstream that *also* tracks state
5034        // (e.g. a real S3 backend) stays in sync. Manager-attached but
5035        // backend rejection is treated as a soft-fail (state is still
5036        // owned by the manager).
5037        if let Some(mgr) = self.versioning.as_ref() {
5038            let new_state = match req
5039                .input
5040                .versioning_configuration
5041                .status
5042                .as_ref()
5043                .map(|s| s.as_str())
5044            {
5045                Some(s) if s.eq_ignore_ascii_case("Enabled") => {
5046                    crate::versioning::VersioningState::Enabled
5047                }
5048                Some(s) if s.eq_ignore_ascii_case("Suspended") => {
5049                    crate::versioning::VersioningState::Suspended
5050                }
5051                _ => crate::versioning::VersioningState::Unversioned,
5052            };
5053            mgr.set_state(&req.input.bucket, new_state);
5054            return Ok(S3Response::new(PutBucketVersioningOutput::default()));
5055        }
5056        self.backend.put_bucket_versioning(req).await
5057    }
5058
5059    // ---- Bucket location ----
5060    async fn get_bucket_location(
5061        &self,
5062        req: S3Request<GetBucketLocationInput>,
5063    ) -> S3Result<S3Response<GetBucketLocationOutput>> {
5064        self.backend.get_bucket_location(req).await
5065    }
5066
5067    // ---- Bucket policy ----
5068    async fn get_bucket_policy(
5069        &self,
5070        req: S3Request<GetBucketPolicyInput>,
5071    ) -> S3Result<S3Response<GetBucketPolicyOutput>> {
5072        self.backend.get_bucket_policy(req).await
5073    }
5074    async fn put_bucket_policy(
5075        &self,
5076        req: S3Request<PutBucketPolicyInput>,
5077    ) -> S3Result<S3Response<PutBucketPolicyOutput>> {
5078        self.backend.put_bucket_policy(req).await
5079    }
5080    async fn delete_bucket_policy(
5081        &self,
5082        req: S3Request<DeleteBucketPolicyInput>,
5083    ) -> S3Result<S3Response<DeleteBucketPolicyOutput>> {
5084        self.backend.delete_bucket_policy(req).await
5085    }
5086    async fn get_bucket_policy_status(
5087        &self,
5088        req: S3Request<GetBucketPolicyStatusInput>,
5089    ) -> S3Result<S3Response<GetBucketPolicyStatusOutput>> {
5090        self.backend.get_bucket_policy_status(req).await
5091    }
5092
5093    // ---- Bucket ACL ----
5094    async fn get_bucket_acl(
5095        &self,
5096        req: S3Request<GetBucketAclInput>,
5097    ) -> S3Result<S3Response<GetBucketAclOutput>> {
5098        self.backend.get_bucket_acl(req).await
5099    }
5100    async fn put_bucket_acl(
5101        &self,
5102        req: S3Request<PutBucketAclInput>,
5103    ) -> S3Result<S3Response<PutBucketAclOutput>> {
5104        self.backend.put_bucket_acl(req).await
5105    }
5106
5107    // ---- Bucket CORS (v0.6 #38) ----
5108    async fn get_bucket_cors(
5109        &self,
5110        req: S3Request<GetBucketCorsInput>,
5111    ) -> S3Result<S3Response<GetBucketCorsOutput>> {
5112        if let Some(mgr) = self.cors.as_ref() {
5113            let cfg = mgr.get(&req.input.bucket).ok_or_else(|| {
5114                S3Error::with_message(
5115                    S3ErrorCode::NoSuchCORSConfiguration,
5116                    "The CORS configuration does not exist".to_string(),
5117                )
5118            })?;
5119            let rules: Vec<CORSRule> = cfg
5120                .rules
5121                .into_iter()
5122                .map(|r| CORSRule {
5123                    allowed_headers: if r.allowed_headers.is_empty() {
5124                        None
5125                    } else {
5126                        Some(r.allowed_headers)
5127                    },
5128                    allowed_methods: r.allowed_methods,
5129                    allowed_origins: r.allowed_origins,
5130                    expose_headers: if r.expose_headers.is_empty() {
5131                        None
5132                    } else {
5133                        Some(r.expose_headers)
5134                    },
5135                    id: r.id,
5136                    max_age_seconds: r.max_age_seconds.map(|s| s as i32),
5137                })
5138                .collect();
5139            return Ok(S3Response::new(GetBucketCorsOutput {
5140                cors_rules: Some(rules),
5141            }));
5142        }
5143        self.backend.get_bucket_cors(req).await
5144    }
5145    async fn put_bucket_cors(
5146        &self,
5147        req: S3Request<PutBucketCorsInput>,
5148    ) -> S3Result<S3Response<PutBucketCorsOutput>> {
5149        if let Some(mgr) = self.cors.as_ref() {
5150            let cfg = crate::cors::CorsConfig {
5151                rules: req
5152                    .input
5153                    .cors_configuration
5154                    .cors_rules
5155                    .into_iter()
5156                    .map(|r| crate::cors::CorsRule {
5157                        allowed_origins: r.allowed_origins,
5158                        allowed_methods: r.allowed_methods,
5159                        allowed_headers: r.allowed_headers.unwrap_or_default(),
5160                        expose_headers: r.expose_headers.unwrap_or_default(),
5161                        max_age_seconds: r
5162                            .max_age_seconds
5163                            .and_then(|s| if s < 0 { None } else { Some(s as u32) }),
5164                        id: r.id,
5165                    })
5166                    .collect(),
5167            };
5168            mgr.put(&req.input.bucket, cfg);
5169            return Ok(S3Response::new(PutBucketCorsOutput::default()));
5170        }
5171        self.backend.put_bucket_cors(req).await
5172    }
5173    async fn delete_bucket_cors(
5174        &self,
5175        req: S3Request<DeleteBucketCorsInput>,
5176    ) -> S3Result<S3Response<DeleteBucketCorsOutput>> {
5177        if let Some(mgr) = self.cors.as_ref() {
5178            mgr.delete(&req.input.bucket);
5179            return Ok(S3Response::new(DeleteBucketCorsOutput::default()));
5180        }
5181        self.backend.delete_bucket_cors(req).await
5182    }
5183
5184    // ---- Bucket lifecycle (v0.6 #37) ----
5185    async fn get_bucket_lifecycle_configuration(
5186        &self,
5187        req: S3Request<GetBucketLifecycleConfigurationInput>,
5188    ) -> S3Result<S3Response<GetBucketLifecycleConfigurationOutput>> {
5189        if let Some(mgr) = self.lifecycle.as_ref() {
5190            let cfg = mgr.get(&req.input.bucket).ok_or_else(|| {
5191                S3Error::with_message(
5192                    S3ErrorCode::NoSuchLifecycleConfiguration,
5193                    "The lifecycle configuration does not exist".to_string(),
5194                )
5195            })?;
5196            let rules: Vec<LifecycleRule> = cfg.rules.iter().map(internal_rule_to_dto).collect();
5197            return Ok(S3Response::new(GetBucketLifecycleConfigurationOutput {
5198                rules: Some(rules),
5199                transition_default_minimum_object_size: None,
5200            }));
5201        }
5202        self.backend.get_bucket_lifecycle_configuration(req).await
5203    }
5204    async fn put_bucket_lifecycle_configuration(
5205        &self,
5206        req: S3Request<PutBucketLifecycleConfigurationInput>,
5207    ) -> S3Result<S3Response<PutBucketLifecycleConfigurationOutput>> {
5208        if let Some(mgr) = self.lifecycle.as_ref() {
5209            let bucket = req.input.bucket.clone();
5210            let dto_cfg = req.input.lifecycle_configuration.unwrap_or_default();
5211            let cfg = dto_lifecycle_to_internal(&dto_cfg);
5212            mgr.put(&bucket, cfg);
5213            return Ok(S3Response::new(
5214                PutBucketLifecycleConfigurationOutput::default(),
5215            ));
5216        }
5217        self.backend.put_bucket_lifecycle_configuration(req).await
5218    }
5219    async fn delete_bucket_lifecycle(
5220        &self,
5221        req: S3Request<DeleteBucketLifecycleInput>,
5222    ) -> S3Result<S3Response<DeleteBucketLifecycleOutput>> {
5223        if let Some(mgr) = self.lifecycle.as_ref() {
5224            mgr.delete(&req.input.bucket);
5225            return Ok(S3Response::new(DeleteBucketLifecycleOutput::default()));
5226        }
5227        self.backend.delete_bucket_lifecycle(req).await
5228    }
5229
5230    // ---- Bucket tagging (v0.6 #39) ----
5231    async fn get_bucket_tagging(
5232        &self,
5233        req: S3Request<GetBucketTaggingInput>,
5234    ) -> S3Result<S3Response<GetBucketTaggingOutput>> {
5235        let Some(mgr) = self.tagging.as_ref() else {
5236            return self.backend.get_bucket_tagging(req).await;
5237        };
5238        let tags = mgr.get_bucket_tags(&req.input.bucket).unwrap_or_default();
5239        Ok(S3Response::new(GetBucketTaggingOutput {
5240            tag_set: tagset_to_aws(&tags),
5241        }))
5242    }
5243    async fn put_bucket_tagging(
5244        &self,
5245        req: S3Request<PutBucketTaggingInput>,
5246    ) -> S3Result<S3Response<PutBucketTaggingOutput>> {
5247        let Some(mgr) = self.tagging.as_ref() else {
5248            return self.backend.put_bucket_tagging(req).await;
5249        };
5250        let bucket = req.input.bucket.clone();
5251        let parsed = aws_to_tagset(&req.input.tagging.tag_set)
5252            .map_err(|e| S3Error::with_message(S3ErrorCode::InvalidArgument, e.to_string()))?;
5253        self.enforce_policy(&req, "s3:PutBucketTagging", &bucket, None)?;
5254        mgr.put_bucket_tags(&bucket, parsed);
5255        Ok(S3Response::new(PutBucketTaggingOutput::default()))
5256    }
5257    async fn delete_bucket_tagging(
5258        &self,
5259        req: S3Request<DeleteBucketTaggingInput>,
5260    ) -> S3Result<S3Response<DeleteBucketTaggingOutput>> {
5261        let Some(mgr) = self.tagging.as_ref() else {
5262            return self.backend.delete_bucket_tagging(req).await;
5263        };
5264        let bucket = req.input.bucket.clone();
5265        self.enforce_policy(&req, "s3:PutBucketTagging", &bucket, None)?;
5266        mgr.delete_bucket_tags(&bucket);
5267        Ok(S3Response::new(DeleteBucketTaggingOutput::default()))
5268    }
5269
5270    // ---- Bucket encryption ----
5271    async fn get_bucket_encryption(
5272        &self,
5273        req: S3Request<GetBucketEncryptionInput>,
5274    ) -> S3Result<S3Response<GetBucketEncryptionOutput>> {
5275        self.backend.get_bucket_encryption(req).await
5276    }
5277    async fn put_bucket_encryption(
5278        &self,
5279        req: S3Request<PutBucketEncryptionInput>,
5280    ) -> S3Result<S3Response<PutBucketEncryptionOutput>> {
5281        self.backend.put_bucket_encryption(req).await
5282    }
5283    async fn delete_bucket_encryption(
5284        &self,
5285        req: S3Request<DeleteBucketEncryptionInput>,
5286    ) -> S3Result<S3Response<DeleteBucketEncryptionOutput>> {
5287        self.backend.delete_bucket_encryption(req).await
5288    }
5289
5290    // ---- Bucket logging ----
5291    async fn get_bucket_logging(
5292        &self,
5293        req: S3Request<GetBucketLoggingInput>,
5294    ) -> S3Result<S3Response<GetBucketLoggingOutput>> {
5295        self.backend.get_bucket_logging(req).await
5296    }
5297    async fn put_bucket_logging(
5298        &self,
5299        req: S3Request<PutBucketLoggingInput>,
5300    ) -> S3Result<S3Response<PutBucketLoggingOutput>> {
5301        self.backend.put_bucket_logging(req).await
5302    }
5303
5304    // ---- Bucket notification (v0.6 #35) ----
5305    //
5306    // When a `NotificationManager` is attached, S4 itself owns per-bucket
5307    // notification configurations and the PUT / GET handlers route through
5308    // the manager. The wire DTO's queue / topic configurations map onto
5309    // S4's `Destination::Sqs` / `Destination::Sns`; LambdaFunction and
5310    // EventBridge configurations are accepted on PUT but silently dropped
5311    // (out of scope for v0.6 #35). When no manager is attached the legacy
5312    // backend-passthrough behaviour applies.
5313    async fn get_bucket_notification_configuration(
5314        &self,
5315        req: S3Request<GetBucketNotificationConfigurationInput>,
5316    ) -> S3Result<S3Response<GetBucketNotificationConfigurationOutput>> {
5317        if let Some(mgr) = self.notifications.as_ref() {
5318            let cfg = mgr.get(&req.input.bucket).unwrap_or_default();
5319            let dto = notif_to_dto(&cfg);
5320            return Ok(S3Response::new(GetBucketNotificationConfigurationOutput {
5321                event_bridge_configuration: dto.event_bridge_configuration,
5322                lambda_function_configurations: dto.lambda_function_configurations,
5323                queue_configurations: dto.queue_configurations,
5324                topic_configurations: dto.topic_configurations,
5325            }));
5326        }
5327        self.backend
5328            .get_bucket_notification_configuration(req)
5329            .await
5330    }
5331    async fn put_bucket_notification_configuration(
5332        &self,
5333        req: S3Request<PutBucketNotificationConfigurationInput>,
5334    ) -> S3Result<S3Response<PutBucketNotificationConfigurationOutput>> {
5335        if let Some(mgr) = self.notifications.as_ref() {
5336            let cfg = notif_from_dto(&req.input.notification_configuration);
5337            mgr.put(&req.input.bucket, cfg);
5338            return Ok(S3Response::new(
5339                PutBucketNotificationConfigurationOutput::default(),
5340            ));
5341        }
5342        self.backend
5343            .put_bucket_notification_configuration(req)
5344            .await
5345    }
5346
5347    // ---- Bucket request payment ----
5348    async fn get_bucket_request_payment(
5349        &self,
5350        req: S3Request<GetBucketRequestPaymentInput>,
5351    ) -> S3Result<S3Response<GetBucketRequestPaymentOutput>> {
5352        self.backend.get_bucket_request_payment(req).await
5353    }
5354    async fn put_bucket_request_payment(
5355        &self,
5356        req: S3Request<PutBucketRequestPaymentInput>,
5357    ) -> S3Result<S3Response<PutBucketRequestPaymentOutput>> {
5358        self.backend.put_bucket_request_payment(req).await
5359    }
5360
5361    // ---- Bucket website ----
5362    async fn get_bucket_website(
5363        &self,
5364        req: S3Request<GetBucketWebsiteInput>,
5365    ) -> S3Result<S3Response<GetBucketWebsiteOutput>> {
5366        self.backend.get_bucket_website(req).await
5367    }
5368    async fn put_bucket_website(
5369        &self,
5370        req: S3Request<PutBucketWebsiteInput>,
5371    ) -> S3Result<S3Response<PutBucketWebsiteOutput>> {
5372        self.backend.put_bucket_website(req).await
5373    }
5374    async fn delete_bucket_website(
5375        &self,
5376        req: S3Request<DeleteBucketWebsiteInput>,
5377    ) -> S3Result<S3Response<DeleteBucketWebsiteOutput>> {
5378        self.backend.delete_bucket_website(req).await
5379    }
5380
5381    // ---- Bucket replication (v0.6 #40) ----
5382    async fn get_bucket_replication(
5383        &self,
5384        req: S3Request<GetBucketReplicationInput>,
5385    ) -> S3Result<S3Response<GetBucketReplicationOutput>> {
5386        if let Some(mgr) = self.replication.as_ref() {
5387            return match mgr.get(&req.input.bucket) {
5388                Some(cfg) => Ok(S3Response::new(GetBucketReplicationOutput {
5389                    replication_configuration: Some(replication_to_dto(&cfg)),
5390                })),
5391                None => Err(S3Error::with_message(
5392                    S3ErrorCode::Custom("ReplicationConfigurationNotFoundError".into()),
5393                    format!(
5394                        "no replication configuration on bucket {}",
5395                        req.input.bucket
5396                    ),
5397                )),
5398            };
5399        }
5400        self.backend.get_bucket_replication(req).await
5401    }
5402    async fn put_bucket_replication(
5403        &self,
5404        req: S3Request<PutBucketReplicationInput>,
5405    ) -> S3Result<S3Response<PutBucketReplicationOutput>> {
5406        if let Some(mgr) = self.replication.as_ref() {
5407            let cfg = replication_from_dto(&req.input.replication_configuration);
5408            mgr.put(&req.input.bucket, cfg);
5409            return Ok(S3Response::new(PutBucketReplicationOutput::default()));
5410        }
5411        self.backend.put_bucket_replication(req).await
5412    }
5413    async fn delete_bucket_replication(
5414        &self,
5415        req: S3Request<DeleteBucketReplicationInput>,
5416    ) -> S3Result<S3Response<DeleteBucketReplicationOutput>> {
5417        if let Some(mgr) = self.replication.as_ref() {
5418            mgr.delete(&req.input.bucket);
5419            return Ok(S3Response::new(DeleteBucketReplicationOutput::default()));
5420        }
5421        self.backend.delete_bucket_replication(req).await
5422    }
5423
5424    // ---- Bucket accelerate ----
5425    async fn get_bucket_accelerate_configuration(
5426        &self,
5427        req: S3Request<GetBucketAccelerateConfigurationInput>,
5428    ) -> S3Result<S3Response<GetBucketAccelerateConfigurationOutput>> {
5429        self.backend.get_bucket_accelerate_configuration(req).await
5430    }
5431    async fn put_bucket_accelerate_configuration(
5432        &self,
5433        req: S3Request<PutBucketAccelerateConfigurationInput>,
5434    ) -> S3Result<S3Response<PutBucketAccelerateConfigurationOutput>> {
5435        self.backend.put_bucket_accelerate_configuration(req).await
5436    }
5437
5438    // ---- Bucket ownership controls ----
5439    async fn get_bucket_ownership_controls(
5440        &self,
5441        req: S3Request<GetBucketOwnershipControlsInput>,
5442    ) -> S3Result<S3Response<GetBucketOwnershipControlsOutput>> {
5443        self.backend.get_bucket_ownership_controls(req).await
5444    }
5445    async fn put_bucket_ownership_controls(
5446        &self,
5447        req: S3Request<PutBucketOwnershipControlsInput>,
5448    ) -> S3Result<S3Response<PutBucketOwnershipControlsOutput>> {
5449        self.backend.put_bucket_ownership_controls(req).await
5450    }
5451    async fn delete_bucket_ownership_controls(
5452        &self,
5453        req: S3Request<DeleteBucketOwnershipControlsInput>,
5454    ) -> S3Result<S3Response<DeleteBucketOwnershipControlsOutput>> {
5455        self.backend.delete_bucket_ownership_controls(req).await
5456    }
5457
5458    // ---- Public access block ----
5459    async fn get_public_access_block(
5460        &self,
5461        req: S3Request<GetPublicAccessBlockInput>,
5462    ) -> S3Result<S3Response<GetPublicAccessBlockOutput>> {
5463        self.backend.get_public_access_block(req).await
5464    }
5465    async fn put_public_access_block(
5466        &self,
5467        req: S3Request<PutPublicAccessBlockInput>,
5468    ) -> S3Result<S3Response<PutPublicAccessBlockOutput>> {
5469        self.backend.put_public_access_block(req).await
5470    }
5471    async fn delete_public_access_block(
5472        &self,
5473        req: S3Request<DeletePublicAccessBlockInput>,
5474    ) -> S3Result<S3Response<DeletePublicAccessBlockOutput>> {
5475        self.backend.delete_public_access_block(req).await
5476    }
5477
5478    // ====================================================================
5479    // v0.6 #41: S3 Select — server-side SQL filter on object body.
5480    //
5481    // Fetch the object via the regular `get_object` path (so SSE-C /
5482    // SSE-S4 / SSE-KMS / S4 codec all decompress + decrypt transparently),
5483    // run a small SQL subset (CSV + JSON Lines, equality / inequality /
5484    // LIKE / AND / OR / NOT) over the in-memory body, and stream the
5485    // matched rows back as AWS event-stream `Records` + `Stats` + `End`
5486    // frames.
5487    //
5488    // Limitations (deliberate, documented):
5489    //   - Parquet input is rejected with NotImplemented.
5490    //   - Aggregates / GROUP BY / JOIN / ORDER BY / LIMIT are rejected at
5491    //     parse time as InvalidRequest (s3s 0.13 doesn't expose AWS's
5492    //     domain-specific `InvalidSqlExpression` code).
5493    //   - The body is fully buffered before SQL evaluation (S3 Select
5494    //     streaming-during-evaluation is v0.7 scope).
5495    //   - GPU-accelerated WHERE evaluation is stubbed out (always None).
5496    async fn select_object_content(
5497        &self,
5498        req: S3Request<SelectObjectContentInput>,
5499    ) -> S3Result<S3Response<SelectObjectContentOutput>> {
5500        use crate::select::{
5501            EventStreamWriter, SelectInputFormat, SelectOutputFormat, run_select_csv,
5502            run_select_jsonlines,
5503        };
5504
5505        let select_bucket = req.input.bucket.clone();
5506        let select_key = req.input.key.clone();
5507        self.enforce_rate_limit(&req, &select_bucket)?;
5508        self.enforce_policy(&req, "s3:GetObject", &select_bucket, Some(&select_key))?;
5509
5510        let request = req.input.request;
5511        let sql = request.expression.clone();
5512        if request.expression_type.as_str() != "SQL" {
5513            return Err(S3Error::with_message(
5514                S3ErrorCode::InvalidExpressionType,
5515                format!(
5516                    "ExpressionType must be SQL, got: {}",
5517                    request.expression_type.as_str()
5518                ),
5519            ));
5520        }
5521
5522        let input_format = if let Some(_json) = request.input_serialization.json.as_ref() {
5523            SelectInputFormat::JsonLines
5524        } else if let Some(csv) = request.input_serialization.csv.as_ref() {
5525            let has_header = csv
5526                .file_header_info
5527                .as_ref()
5528                .map(|h| {
5529                    let s = h.as_str();
5530                    s.eq_ignore_ascii_case("USE") || s.eq_ignore_ascii_case("IGNORE")
5531                })
5532                .unwrap_or(false);
5533            let delim = csv
5534                .field_delimiter
5535                .as_deref()
5536                .and_then(|s| s.chars().next())
5537                .unwrap_or(',');
5538            SelectInputFormat::Csv {
5539                has_header,
5540                delimiter: delim,
5541            }
5542        } else if request.input_serialization.parquet.is_some() {
5543            return Err(S3Error::with_message(
5544                S3ErrorCode::NotImplemented,
5545                "Parquet input is not supported by this S3 Select implementation (v0.6: CSV / JSON Lines only)",
5546            ));
5547        } else {
5548            return Err(S3Error::with_message(
5549                S3ErrorCode::InvalidRequest,
5550                "InputSerialization requires exactly one of CSV / JSON / Parquet",
5551            ));
5552        };
5553        if let Some(ct) = request.input_serialization.compression_type.as_ref()
5554            && !ct.as_str().eq_ignore_ascii_case("NONE")
5555        {
5556            return Err(S3Error::with_message(
5557                S3ErrorCode::NotImplemented,
5558                format!(
5559                    "InputSerialization CompressionType={} is not supported (v0.6: NONE only)",
5560                    ct.as_str()
5561                ),
5562            ));
5563        }
5564
5565        let output_format = if request.output_serialization.json.is_some() {
5566            SelectOutputFormat::Json
5567        } else if request.output_serialization.csv.is_some() {
5568            SelectOutputFormat::Csv
5569        } else {
5570            return Err(S3Error::with_message(
5571                S3ErrorCode::InvalidRequest,
5572                "OutputSerialization requires exactly one of CSV / JSON",
5573            ));
5574        };
5575
5576        let get_input = GetObjectInput {
5577            bucket: select_bucket.clone(),
5578            key: select_key.clone(),
5579            sse_customer_algorithm: req.input.sse_customer_algorithm.clone(),
5580            sse_customer_key: req.input.sse_customer_key.clone(),
5581            sse_customer_key_md5: req.input.sse_customer_key_md5.clone(),
5582            ..Default::default()
5583        };
5584        let get_req = S3Request {
5585            input: get_input,
5586            method: http::Method::GET,
5587            uri: format!("/{}/{}", select_bucket, select_key)
5588                .parse()
5589                .map_err(|e| {
5590                    S3Error::with_message(
5591                        S3ErrorCode::InternalError,
5592                        format!("constructing inner GET URI: {e}"),
5593                    )
5594                })?,
5595            headers: http::HeaderMap::new(),
5596            extensions: http::Extensions::new(),
5597            credentials: req.credentials.clone(),
5598            region: req.region.clone(),
5599            service: req.service.clone(),
5600            trailing_headers: None,
5601        };
5602        let mut get_resp = self.get_object(get_req).await?;
5603        let blob = get_resp.output.body.take().ok_or_else(|| {
5604            S3Error::with_message(
5605                S3ErrorCode::InternalError,
5606                "Select: object body was empty after GET",
5607            )
5608        })?;
5609        let body_bytes = crate::blob::collect_blob(blob, self.max_body_bytes)
5610            .await
5611            .map_err(internal("collect Select body"))?;
5612        let scanned = body_bytes.len() as u64;
5613
5614        let matched_payload = match input_format {
5615            SelectInputFormat::JsonLines => run_select_jsonlines(&sql, &body_bytes, output_format)
5616                .map_err(|e| select_error_to_s3(e, "JSON Lines"))?,
5617            SelectInputFormat::Csv { .. } => {
5618                run_select_csv(&sql, &body_bytes, input_format, output_format)
5619                    .map_err(|e| select_error_to_s3(e, "CSV"))?
5620            }
5621        };
5622
5623        let returned = matched_payload.len() as u64;
5624        let processed = scanned;
5625        let mut events: Vec<S3Result<SelectObjectContentEvent>> = Vec::with_capacity(3);
5626        if !matched_payload.is_empty() {
5627            events.push(Ok(SelectObjectContentEvent::Records(RecordsEvent {
5628                payload: Some(bytes::Bytes::from(matched_payload)),
5629            })));
5630        }
5631        events.push(Ok(SelectObjectContentEvent::Stats(StatsEvent {
5632            details: Some(Stats {
5633                bytes_scanned: Some(scanned as i64),
5634                bytes_processed: Some(processed as i64),
5635                bytes_returned: Some(returned as i64),
5636            }),
5637        })));
5638        events.push(Ok(SelectObjectContentEvent::End(EndEvent {})));
5639        // Touch EventStreamWriter so the public API stays linked into the
5640        // build (the actual wire framing is delegated to s3s).
5641        let _writer = EventStreamWriter::new();
5642
5643        let stream = SelectObjectContentEventStream::new(futures::stream::iter(events));
5644        let output = SelectObjectContentOutput {
5645            payload: Some(stream),
5646        };
5647        Ok(S3Response::new(output))
5648    }
5649
5650    // ---- Bucket Inventory configuration (v0.6 #36) ----
5651    //
5652    // When an `InventoryManager` is attached, S4-server owns the
5653    // configuration store and these handlers no longer pass through to
5654    // the backend. The mapping between the s3s-typed
5655    // `InventoryConfiguration` and the inventory module's internal
5656    // `InventoryConfig` is intentionally lossy: only the fields S4
5657    // actually uses for periodic CSV emission survive the round trip
5658    // (id, source bucket, destination bucket / prefix, format, included
5659    // versions, schedule frequency). Optional fields, encryption, and
5660    // filter prefixes are accepted on PUT and re-surfaced on GET via
5661    // a best-effort default-shape `InventoryConfiguration` so the
5662    // client sees a roundtrip-clean response.
5663    async fn put_bucket_inventory_configuration(
5664        &self,
5665        req: S3Request<PutBucketInventoryConfigurationInput>,
5666    ) -> S3Result<S3Response<PutBucketInventoryConfigurationOutput>> {
5667        if let Some(mgr) = self.inventory.as_ref() {
5668            let cfg = inv_from_dto(
5669                &req.input.bucket,
5670                &req.input.id,
5671                &req.input.inventory_configuration,
5672            );
5673            mgr.put(cfg);
5674            return Ok(S3Response::new(
5675                PutBucketInventoryConfigurationOutput::default(),
5676            ));
5677        }
5678        self.backend.put_bucket_inventory_configuration(req).await
5679    }
5680
5681    async fn get_bucket_inventory_configuration(
5682        &self,
5683        req: S3Request<GetBucketInventoryConfigurationInput>,
5684    ) -> S3Result<S3Response<GetBucketInventoryConfigurationOutput>> {
5685        if let Some(mgr) = self.inventory.as_ref() {
5686            let cfg = mgr.get(&req.input.bucket, &req.input.id);
5687            if let Some(cfg) = cfg {
5688                let out = GetBucketInventoryConfigurationOutput {
5689                    inventory_configuration: Some(inv_to_dto(&cfg)),
5690                };
5691                return Ok(S3Response::new(out));
5692            }
5693            // AWS returns `NoSuchConfiguration` (404) when the id has no
5694            // matching inventory configuration on the bucket. The
5695            // generated `S3ErrorCode` enum doesn't expose a typed variant
5696            // for this code, so we round-trip through `from_bytes` which
5697            // wraps unknown codes as `Custom(...)` (= the AWS-canonical
5698            // error-code string survives into the XML response envelope).
5699            let code =
5700                S3ErrorCode::from_bytes(b"NoSuchConfiguration").unwrap_or(S3ErrorCode::NoSuchKey);
5701            return Err(S3Error::with_message(
5702                code,
5703                format!(
5704                    "no inventory configuration with id={} on bucket={}",
5705                    req.input.id, req.input.bucket
5706                ),
5707            ));
5708        }
5709        self.backend.get_bucket_inventory_configuration(req).await
5710    }
5711
5712    async fn list_bucket_inventory_configurations(
5713        &self,
5714        req: S3Request<ListBucketInventoryConfigurationsInput>,
5715    ) -> S3Result<S3Response<ListBucketInventoryConfigurationsOutput>> {
5716        if let Some(mgr) = self.inventory.as_ref() {
5717            let list = mgr.list_for_bucket(&req.input.bucket);
5718            let dto_list: Vec<InventoryConfiguration> = list.iter().map(inv_to_dto).collect();
5719            let out = ListBucketInventoryConfigurationsOutput {
5720                continuation_token: req.input.continuation_token.clone(),
5721                inventory_configuration_list: if dto_list.is_empty() {
5722                    None
5723                } else {
5724                    Some(dto_list)
5725                },
5726                is_truncated: Some(false),
5727                next_continuation_token: None,
5728            };
5729            return Ok(S3Response::new(out));
5730        }
5731        self.backend.list_bucket_inventory_configurations(req).await
5732    }
5733
5734    async fn delete_bucket_inventory_configuration(
5735        &self,
5736        req: S3Request<DeleteBucketInventoryConfigurationInput>,
5737    ) -> S3Result<S3Response<DeleteBucketInventoryConfigurationOutput>> {
5738        if let Some(mgr) = self.inventory.as_ref() {
5739            mgr.delete(&req.input.bucket, &req.input.id);
5740            return Ok(S3Response::new(
5741                DeleteBucketInventoryConfigurationOutput::default(),
5742            ));
5743        }
5744        self.backend
5745            .delete_bucket_inventory_configuration(req)
5746            .await
5747    }
5748}
5749
5750// ---------------------------------------------------------------------------
5751// v0.6 #36: Convert between the s3s-typed `InventoryConfiguration` (the wire
5752// surface) and our internal `crate::inventory::InventoryConfig`. Only the
5753// fields S4 actually uses for CSV emission survive the round trip; the
5754// missing fields (filter prefix, optional fields, encryption) are dropped on
5755// PUT and re-rendered as the AWS-default shape on GET so the client sees a
5756// well-formed `InventoryConfiguration`.
5757// ---------------------------------------------------------------------------
5758
5759fn inv_from_dto(
5760    bucket: &str,
5761    id: &str,
5762    dto: &InventoryConfiguration,
5763) -> crate::inventory::InventoryConfig {
5764    let frequency_hours = match dto.schedule.frequency.as_str() {
5765        "Weekly" => 24 * 7,
5766        // Daily is the default; anything S4 doesn't recognise (incl.
5767        // empty, which is the s3s-default) maps to Daily so the
5768        // operator's PUT doesn't silently turn into a no-op cadence.
5769        _ => 24,
5770    };
5771    // Parquet/ORC are not supported (issue #36 scope); we still accept
5772    // the PUT so callers don't fail-loud, but we record CSV and rely on
5773    // the operator catching the discrepancy on GET.
5774    let format = crate::inventory::InventoryFormat::Csv;
5775    crate::inventory::InventoryConfig {
5776        id: id.to_owned(),
5777        bucket: bucket.to_owned(),
5778        destination_bucket: dto.destination.s3_bucket_destination.bucket.clone(),
5779        destination_prefix: dto
5780            .destination
5781            .s3_bucket_destination
5782            .prefix
5783            .clone()
5784            .unwrap_or_default(),
5785        frequency_hours,
5786        format,
5787        included_object_versions: crate::inventory::IncludedVersions::from_aws_str(
5788            dto.included_object_versions.as_str(),
5789        ),
5790    }
5791}
5792
5793fn inv_to_dto(cfg: &crate::inventory::InventoryConfig) -> InventoryConfiguration {
5794    InventoryConfiguration {
5795        id: cfg.id.clone(),
5796        is_enabled: true,
5797        included_object_versions: InventoryIncludedObjectVersions::from(
5798            cfg.included_object_versions.as_aws_str().to_owned(),
5799        ),
5800        destination: InventoryDestination {
5801            s3_bucket_destination: InventoryS3BucketDestination {
5802                account_id: None,
5803                bucket: cfg.destination_bucket.clone(),
5804                encryption: None,
5805                format: InventoryFormat::from(cfg.format.as_aws_str().to_owned()),
5806                prefix: if cfg.destination_prefix.is_empty() {
5807                    None
5808                } else {
5809                    Some(cfg.destination_prefix.clone())
5810                },
5811            },
5812        },
5813        schedule: InventorySchedule {
5814            // `frequency_hours == 168` -> Weekly; everything else maps to
5815            // Daily for the wire response (the manager keeps the precise
5816            // hour count internally for due-checking).
5817            frequency: InventoryFrequency::from(
5818                if cfg.frequency_hours == 24 * 7 {
5819                    "Weekly"
5820                } else {
5821                    "Daily"
5822                }
5823                .to_owned(),
5824            ),
5825        },
5826        filter: None,
5827        optional_fields: None,
5828    }
5829}
5830
5831// ---------------------------------------------------------------------------
5832// v0.6 #35: Convert between the s3s-typed `NotificationConfiguration` (the
5833// wire surface) and our internal `crate::notifications::NotificationConfig`.
5834//
5835// We support TopicConfiguration (-> Destination::Sns) and QueueConfiguration
5836// (-> Destination::Sqs). LambdaFunction and EventBridge configurations are
5837// silently dropped on PUT (out of scope for v0.6 #35); the GET response only
5838// surfaces topic / queue rules.
5839//
5840// The webhook destination has no AWS-native wire form: operators configure
5841// webhooks via the JSON snapshot file (`--notifications-state-file`) or by
5842// poking `NotificationManager::put` directly from a custom binary. This
5843// keeps the wire surface AWS-compatible while still letting the always-
5844// available `Webhook` destination be reachable.
5845// ---------------------------------------------------------------------------
5846
5847fn notif_from_dto(dto: &NotificationConfiguration) -> crate::notifications::NotificationConfig {
5848    let mut rules: Vec<crate::notifications::NotificationRule> = Vec::new();
5849    if let Some(topics) = dto.topic_configurations.as_ref() {
5850        for (idx, t) in topics.iter().enumerate() {
5851            let events = events_from_dto(&t.events);
5852            let (prefix, suffix) = filter_from_dto(t.filter.as_ref());
5853            rules.push(crate::notifications::NotificationRule {
5854                id: t.id.clone().unwrap_or_else(|| format!("topic-{idx}")),
5855                events,
5856                destination: crate::notifications::Destination::Sns {
5857                    topic_arn: t.topic_arn.clone(),
5858                },
5859                filter_prefix: prefix,
5860                filter_suffix: suffix,
5861            });
5862        }
5863    }
5864    if let Some(queues) = dto.queue_configurations.as_ref() {
5865        for (idx, q) in queues.iter().enumerate() {
5866            let events = events_from_dto(&q.events);
5867            let (prefix, suffix) = filter_from_dto(q.filter.as_ref());
5868            rules.push(crate::notifications::NotificationRule {
5869                id: q.id.clone().unwrap_or_else(|| format!("queue-{idx}")),
5870                events,
5871                destination: crate::notifications::Destination::Sqs {
5872                    queue_arn: q.queue_arn.clone(),
5873                },
5874                filter_prefix: prefix,
5875                filter_suffix: suffix,
5876            });
5877        }
5878    }
5879    crate::notifications::NotificationConfig { rules }
5880}
5881
5882fn notif_to_dto(cfg: &crate::notifications::NotificationConfig) -> NotificationConfiguration {
5883    let mut topics: Vec<TopicConfiguration> = Vec::new();
5884    let mut queues: Vec<QueueConfiguration> = Vec::new();
5885    for rule in &cfg.rules {
5886        let events: Vec<Event> = rule
5887            .events
5888            .iter()
5889            .map(|e| Event::from(e.as_aws_str().to_owned()))
5890            .collect();
5891        let filter = filter_to_dto(rule.filter_prefix.as_deref(), rule.filter_suffix.as_deref());
5892        match &rule.destination {
5893            crate::notifications::Destination::Sns { topic_arn } => {
5894                topics.push(TopicConfiguration {
5895                    events,
5896                    filter,
5897                    id: Some(rule.id.clone()),
5898                    topic_arn: topic_arn.clone(),
5899                });
5900            }
5901            crate::notifications::Destination::Sqs { queue_arn } => {
5902                queues.push(QueueConfiguration {
5903                    events,
5904                    filter,
5905                    id: Some(rule.id.clone()),
5906                    queue_arn: queue_arn.clone(),
5907                });
5908            }
5909            // Webhook destinations have no AWS wire equivalent — they
5910            // round-trip through the JSON snapshot only. Skip them on the
5911            // GET surface (an SDK consumer wouldn't know what to do with
5912            // them anyway).
5913            crate::notifications::Destination::Webhook { .. } => {}
5914        }
5915    }
5916    NotificationConfiguration {
5917        event_bridge_configuration: None,
5918        lambda_function_configurations: None,
5919        queue_configurations: if queues.is_empty() {
5920            None
5921        } else {
5922            Some(queues)
5923        },
5924        topic_configurations: if topics.is_empty() {
5925            None
5926        } else {
5927            Some(topics)
5928        },
5929    }
5930}
5931
5932fn events_from_dto(events: &[Event]) -> Vec<crate::notifications::EventType> {
5933    events
5934        .iter()
5935        .filter_map(|e| crate::notifications::EventType::from_aws_str(e.as_ref()))
5936        .collect()
5937}
5938
5939fn filter_from_dto(
5940    f: Option<&NotificationConfigurationFilter>,
5941) -> (Option<String>, Option<String>) {
5942    let Some(f) = f else {
5943        return (None, None);
5944    };
5945    let Some(key) = f.key.as_ref() else {
5946        return (None, None);
5947    };
5948    let Some(rules) = key.filter_rules.as_ref() else {
5949        return (None, None);
5950    };
5951    let mut prefix = None;
5952    let mut suffix = None;
5953    for r in rules {
5954        let name = r.name.as_ref().map(|n| n.as_str().to_ascii_lowercase());
5955        let value = r.value.clone();
5956        match name.as_deref() {
5957            Some("prefix") => prefix = value,
5958            Some("suffix") => suffix = value,
5959            _ => {}
5960        }
5961    }
5962    (prefix, suffix)
5963}
5964
5965fn filter_to_dto(
5966    prefix: Option<&str>,
5967    suffix: Option<&str>,
5968) -> Option<NotificationConfigurationFilter> {
5969    if prefix.is_none() && suffix.is_none() {
5970        return None;
5971    }
5972    let mut rules: Vec<FilterRule> = Vec::new();
5973    if let Some(p) = prefix {
5974        rules.push(FilterRule {
5975            name: Some(FilterRuleName::from("prefix".to_owned())),
5976            value: Some(p.to_owned()),
5977        });
5978    }
5979    if let Some(s) = suffix {
5980        rules.push(FilterRule {
5981            name: Some(FilterRuleName::from("suffix".to_owned())),
5982            value: Some(s.to_owned()),
5983        });
5984    }
5985    Some(NotificationConfigurationFilter {
5986        key: Some(S3KeyFilter {
5987            filter_rules: Some(rules),
5988        }),
5989    })
5990}
5991
5992// ---------------------------------------------------------------------------
5993// v0.6 #40: Convert between the s3s-typed `ReplicationConfiguration` (the
5994// wire surface) and our internal `crate::replication::ReplicationConfig`.
5995// AWS's `ReplicationRuleFilter` is a sum type — `Prefix | Tag | And { Prefix,
5996// Tags }`; we flatten it into the single `(prefix, tag-vec)` representation
5997// the matcher needs. Sub-blocks v0.6 #40 does not implement
5998// (DeleteMarkerReplication / SourceSelectionCriteria / ReplicationTime /
5999// Metrics / EncryptionConfiguration) round-trip as `None` on GET — operators
6000// who set them on PUT see them silently dropped, mirroring "feature not
6001// supported in this release" semantics.
6002// ---------------------------------------------------------------------------
6003
6004fn replication_from_dto(dto: &ReplicationConfiguration) -> crate::replication::ReplicationConfig {
6005    let rules = dto
6006        .rules
6007        .iter()
6008        .enumerate()
6009        .map(|(idx, r)| {
6010            let id =
6011                r.id.as_ref()
6012                    .map(|s| s.as_str().to_owned())
6013                    .unwrap_or_else(|| format!("rule-{idx}"));
6014            let priority = r.priority.unwrap_or(0).max(0) as u32;
6015            let status_enabled = r.status.as_str() == ReplicationRuleStatus::ENABLED;
6016            let filter = replication_filter_from_dto(r.filter.as_ref(), r.prefix.as_deref());
6017            let destination_bucket = r.destination.bucket.clone();
6018            let destination_storage_class = r
6019                .destination
6020                .storage_class
6021                .as_ref()
6022                .map(|s| s.as_str().to_owned());
6023            crate::replication::ReplicationRule {
6024                id,
6025                priority,
6026                status_enabled,
6027                filter,
6028                destination_bucket,
6029                destination_storage_class,
6030            }
6031        })
6032        .collect();
6033    crate::replication::ReplicationConfig {
6034        role: dto.role.clone(),
6035        rules,
6036    }
6037}
6038
6039fn replication_to_dto(cfg: &crate::replication::ReplicationConfig) -> ReplicationConfiguration {
6040    let rules = cfg
6041        .rules
6042        .iter()
6043        .map(|r| {
6044            let status = if r.status_enabled {
6045                ReplicationRuleStatus::from_static(ReplicationRuleStatus::ENABLED)
6046            } else {
6047                ReplicationRuleStatus::from_static(ReplicationRuleStatus::DISABLED)
6048            };
6049            let destination = Destination {
6050                access_control_translation: None,
6051                account: None,
6052                bucket: r.destination_bucket.clone(),
6053                encryption_configuration: None,
6054                metrics: None,
6055                replication_time: None,
6056                storage_class: r
6057                    .destination_storage_class
6058                    .as_ref()
6059                    .map(|s| StorageClass::from(s.clone())),
6060            };
6061            let filter = Some(replication_filter_to_dto(&r.filter));
6062            ReplicationRule {
6063                delete_marker_replication: None,
6064                destination,
6065                existing_object_replication: None,
6066                filter,
6067                id: Some(r.id.clone()),
6068                prefix: None,
6069                priority: Some(r.priority as i32),
6070                source_selection_criteria: None,
6071                status,
6072            }
6073        })
6074        .collect();
6075    ReplicationConfiguration {
6076        role: cfg.role.clone(),
6077        rules,
6078    }
6079}
6080
6081fn replication_filter_from_dto(
6082    f: Option<&ReplicationRuleFilter>,
6083    rule_level_prefix: Option<&str>,
6084) -> crate::replication::ReplicationFilter {
6085    let mut prefix: Option<String> = rule_level_prefix.map(str::to_owned);
6086    let mut tags: Vec<(String, String)> = Vec::new();
6087    if let Some(f) = f {
6088        if let Some(p) = f.prefix.as_ref()
6089            && prefix.is_none()
6090        {
6091            prefix = Some(p.clone());
6092        }
6093        if let Some(t) = f.tag.as_ref()
6094            && let (Some(k), Some(v)) = (t.key.as_ref(), t.value.as_ref())
6095        {
6096            tags.push((k.clone(), v.clone()));
6097        }
6098        if let Some(and) = f.and.as_ref() {
6099            if let Some(p) = and.prefix.as_ref()
6100                && prefix.is_none()
6101            {
6102                prefix = Some(p.clone());
6103            }
6104            if let Some(ts) = and.tags.as_ref() {
6105                for t in ts {
6106                    if let (Some(k), Some(v)) = (t.key.as_ref(), t.value.as_ref()) {
6107                        tags.push((k.clone(), v.clone()));
6108                    }
6109                }
6110            }
6111        }
6112    }
6113    crate::replication::ReplicationFilter { prefix, tags }
6114}
6115
6116fn replication_filter_to_dto(f: &crate::replication::ReplicationFilter) -> ReplicationRuleFilter {
6117    if f.tags.is_empty() {
6118        ReplicationRuleFilter {
6119            and: None,
6120            prefix: f.prefix.clone(),
6121            tag: None,
6122        }
6123    } else if f.tags.len() == 1 && f.prefix.is_none() {
6124        let (k, v) = &f.tags[0];
6125        ReplicationRuleFilter {
6126            and: None,
6127            prefix: None,
6128            tag: Some(Tag {
6129                key: Some(k.clone()),
6130                value: Some(v.clone()),
6131            }),
6132        }
6133    } else {
6134        let tags: Vec<Tag> = f
6135            .tags
6136            .iter()
6137            .map(|(k, v)| Tag {
6138                key: Some(k.clone()),
6139                value: Some(v.clone()),
6140            })
6141            .collect();
6142        ReplicationRuleFilter {
6143            and: Some(ReplicationRuleAndOperator {
6144                prefix: f.prefix.clone(),
6145                tags: Some(tags),
6146            }),
6147            prefix: None,
6148            tag: None,
6149        }
6150    }
6151}
6152
6153// ---------------------------------------------------------------------------
6154// v0.6 #37: Convert between the s3s-typed `BucketLifecycleConfiguration`
6155// (the wire surface) and our internal `crate::lifecycle::LifecycleConfig`.
6156// The internal representation flattens AWS's "Filter | And" disjunction
6157// into a single `LifecycleFilter` struct of optional fields plus a tag
6158// vector. Fields S4's evaluator does not consume
6159// (`expired_object_delete_marker`, `noncurrent_version_transitions`,
6160// `transition_default_minimum_object_size`, the storage class on the
6161// noncurrent expiration) are dropped on PUT and re-rendered as their
6162// AWS-default shape on GET so the client always sees a well-formed
6163// configuration.
6164// ---------------------------------------------------------------------------
6165
6166fn dto_lifecycle_to_internal(
6167    dto: &BucketLifecycleConfiguration,
6168) -> crate::lifecycle::LifecycleConfig {
6169    crate::lifecycle::LifecycleConfig {
6170        rules: dto.rules.iter().map(dto_rule_to_internal).collect(),
6171    }
6172}
6173
6174fn dto_rule_to_internal(rule: &LifecycleRule) -> crate::lifecycle::LifecycleRule {
6175    let status = crate::lifecycle::LifecycleStatus::from_aws_str(rule.status.as_str());
6176    let filter = rule
6177        .filter
6178        .as_ref()
6179        .map(dto_filter_to_internal)
6180        .unwrap_or_default();
6181    let expiration_days = rule
6182        .expiration
6183        .as_ref()
6184        .and_then(|e| e.days)
6185        .and_then(|d| u32::try_from(d).ok());
6186    let expiration_date = rule
6187        .expiration
6188        .as_ref()
6189        .and_then(|e| e.date.as_ref())
6190        .and_then(timestamp_to_chrono_utc);
6191    let transitions: Vec<crate::lifecycle::TransitionRule> = rule
6192        .transitions
6193        .as_ref()
6194        .map(|ts| {
6195            ts.iter()
6196                .filter_map(|t| {
6197                    let days = u32::try_from(t.days?).ok()?;
6198                    let storage_class = t.storage_class.as_ref()?.as_str().to_owned();
6199                    Some(crate::lifecycle::TransitionRule {
6200                        days,
6201                        storage_class,
6202                    })
6203                })
6204                .collect()
6205        })
6206        .unwrap_or_default();
6207    let noncurrent_version_expiration_days = rule
6208        .noncurrent_version_expiration
6209        .as_ref()
6210        .and_then(|n| n.noncurrent_days)
6211        .and_then(|d| u32::try_from(d).ok());
6212    let abort_incomplete_multipart_upload_days = rule
6213        .abort_incomplete_multipart_upload
6214        .as_ref()
6215        .and_then(|a| a.days_after_initiation)
6216        .and_then(|d| u32::try_from(d).ok());
6217    crate::lifecycle::LifecycleRule {
6218        id: rule.id.clone().unwrap_or_default(),
6219        status,
6220        filter,
6221        expiration_days,
6222        expiration_date,
6223        transitions,
6224        noncurrent_version_expiration_days,
6225        abort_incomplete_multipart_upload_days,
6226    }
6227}
6228
6229fn dto_filter_to_internal(filter: &LifecycleRuleFilter) -> crate::lifecycle::LifecycleFilter {
6230    let mut prefix = filter.prefix.clone();
6231    let mut tags: Vec<(String, String)> = Vec::new();
6232    let mut size_gt: Option<u64> = filter
6233        .object_size_greater_than
6234        .and_then(|n| u64::try_from(n).ok());
6235    let mut size_lt: Option<u64> = filter
6236        .object_size_less_than
6237        .and_then(|n| u64::try_from(n).ok());
6238    if let Some(t) = &filter.tag
6239        && let (Some(k), Some(v)) = (t.key.as_ref(), t.value.as_ref())
6240    {
6241        tags.push((k.clone(), v.clone()));
6242    }
6243    if let Some(and) = &filter.and {
6244        if prefix.is_none() {
6245            prefix = and.prefix.clone();
6246        }
6247        if size_gt.is_none() {
6248            size_gt = and
6249                .object_size_greater_than
6250                .and_then(|n| u64::try_from(n).ok());
6251        }
6252        if size_lt.is_none() {
6253            size_lt = and
6254                .object_size_less_than
6255                .and_then(|n| u64::try_from(n).ok());
6256        }
6257        if let Some(ts) = &and.tags {
6258            for t in ts {
6259                if let (Some(k), Some(v)) = (t.key.as_ref(), t.value.as_ref()) {
6260                    tags.push((k.clone(), v.clone()));
6261                }
6262            }
6263        }
6264    }
6265    crate::lifecycle::LifecycleFilter {
6266        prefix,
6267        tags,
6268        object_size_greater_than: size_gt,
6269        object_size_less_than: size_lt,
6270    }
6271}
6272
6273fn internal_rule_to_dto(rule: &crate::lifecycle::LifecycleRule) -> LifecycleRule {
6274    let expiration = if rule.expiration_days.is_some() || rule.expiration_date.is_some() {
6275        Some(LifecycleExpiration {
6276            date: rule.expiration_date.map(chrono_utc_to_timestamp),
6277            days: rule.expiration_days.map(|d| d as i32),
6278            expired_object_delete_marker: None,
6279        })
6280    } else {
6281        None
6282    };
6283    let transitions: Option<TransitionList> = if rule.transitions.is_empty() {
6284        None
6285    } else {
6286        Some(
6287            rule.transitions
6288                .iter()
6289                .map(|t| Transition {
6290                    date: None,
6291                    days: Some(t.days as i32),
6292                    storage_class: Some(TransitionStorageClass::from(t.storage_class.clone())),
6293                })
6294                .collect(),
6295        )
6296    };
6297    let noncurrent_version_expiration =
6298        rule.noncurrent_version_expiration_days
6299            .map(|d| NoncurrentVersionExpiration {
6300                newer_noncurrent_versions: None,
6301                noncurrent_days: Some(d as i32),
6302            });
6303    let abort_incomplete_multipart_upload =
6304        rule.abort_incomplete_multipart_upload_days
6305            .map(|d| AbortIncompleteMultipartUpload {
6306                days_after_initiation: Some(d as i32),
6307            });
6308    let filter = if rule.filter.tags.is_empty()
6309        && rule.filter.object_size_greater_than.is_none()
6310        && rule.filter.object_size_less_than.is_none()
6311    {
6312        rule.filter.prefix.as_ref().map(|p| LifecycleRuleFilter {
6313            and: None,
6314            object_size_greater_than: None,
6315            object_size_less_than: None,
6316            prefix: Some(p.clone()),
6317            tag: None,
6318        })
6319    } else if rule.filter.tags.len() == 1
6320        && rule.filter.prefix.is_none()
6321        && rule.filter.object_size_greater_than.is_none()
6322        && rule.filter.object_size_less_than.is_none()
6323    {
6324        let (k, v) = rule.filter.tags[0].clone();
6325        Some(LifecycleRuleFilter {
6326            and: None,
6327            object_size_greater_than: None,
6328            object_size_less_than: None,
6329            prefix: None,
6330            tag: Some(Tag {
6331                key: Some(k),
6332                value: Some(v),
6333            }),
6334        })
6335    } else {
6336        let tags = if rule.filter.tags.is_empty() {
6337            None
6338        } else {
6339            Some(
6340                rule.filter
6341                    .tags
6342                    .iter()
6343                    .map(|(k, v)| Tag {
6344                        key: Some(k.clone()),
6345                        value: Some(v.clone()),
6346                    })
6347                    .collect(),
6348            )
6349        };
6350        Some(LifecycleRuleFilter {
6351            and: Some(LifecycleRuleAndOperator {
6352                object_size_greater_than: rule
6353                    .filter
6354                    .object_size_greater_than
6355                    .and_then(|n| i64::try_from(n).ok()),
6356                object_size_less_than: rule
6357                    .filter
6358                    .object_size_less_than
6359                    .and_then(|n| i64::try_from(n).ok()),
6360                prefix: rule.filter.prefix.clone(),
6361                tags,
6362            }),
6363            object_size_greater_than: None,
6364            object_size_less_than: None,
6365            prefix: None,
6366            tag: None,
6367        })
6368    };
6369    LifecycleRule {
6370        abort_incomplete_multipart_upload,
6371        expiration,
6372        filter,
6373        id: if rule.id.is_empty() {
6374            None
6375        } else {
6376            Some(rule.id.clone())
6377        },
6378        noncurrent_version_expiration,
6379        noncurrent_version_transitions: None,
6380        prefix: None,
6381        status: ExpirationStatus::from(rule.status.as_aws_str().to_owned()),
6382        transitions,
6383    }
6384}
6385
6386// (timestamp <-> chrono helpers `timestamp_to_chrono_utc` /
6387// `chrono_utc_to_timestamp` are defined earlier in this file for the
6388// tagging/notifications work; the lifecycle DTO converters reuse them.)
6389
6390// ---------------------------------------------------------------------------
6391// v0.5 #33: SigV4a (asymmetric ECDSA-P256) integration hook.
6392//
6393// Kept as a self-contained block at the bottom of the file so it doesn't
6394// touch the existing `S4Service` struct, `new()`, or any of the per-op
6395// handlers above. The hook is wired in by the binary at server-build time
6396// as a hyper middleware layer (see `main.rs`), NOT inside `S4Service`.
6397//
6398// Lifecycle:
6399//   1. `SigV4aGate::new(store)` is constructed once at boot from the
6400//      operator-supplied credential directory.
6401//   2. For each incoming request, `SigV4aGate::pre_route(&req,
6402//      &requested_region, &canonical_request_bytes)` is invoked BEFORE
6403//      the request hits the S3 framework. If the request claims SigV4a
6404//      and verifies, control returns to the framework. Otherwise a 403
6405//      `SignatureDoesNotMatch` is produced.
6406//   3. Plain SigV4 (HMAC-SHA256) requests pass through untouched.
6407// ---------------------------------------------------------------------------
6408
6409/// Gate that fronts the S3 service path with SigV4a verification (v0.5 #33).
6410///
6411/// Wraps a [`crate::sigv4a::SigV4aCredentialStore`] and exposes a single
6412/// `pre_route` entry point that returns `Ok(())` for both
6413/// "request is plain SigV4 — pass through" and "request is SigV4a and
6414/// verified", and an `Err(...)` containing a 403-equivalent diagnostic
6415/// otherwise. Cheap to clone (the inner store is `Arc`-backed).
6416///
6417/// v0.8.4 #76 (audit H-6): the gate now enforces an `x-amz-date`
6418/// freshness window (default 15 min, AWS-spec) and a strict credential
6419/// scope shape (`<key>/<YYYYMMDD>/s3/aws4_request`), shutting the
6420/// captured-request replay vector — previously a stolen valid SigV4a
6421/// signature could be replayed indefinitely (including DELETE).
6422#[derive(Debug, Clone)]
6423pub struct SigV4aGate {
6424    store: crate::sigv4a::SharedSigV4aCredentialStore,
6425    /// v0.8.4 #76: how far the request's `x-amz-date` may drift from
6426    /// the server's clock before being rejected with 403
6427    /// `RequestTimeTooSkewed`. Matches the AWS S3 spec default of
6428    /// 15 min when constructed via [`SigV4aGate::new`]; the operator
6429    /// can override via [`SigV4aGate::with_skew_tolerance`] (CLI flag
6430    /// `--sigv4a-skew-tolerance-seconds`).
6431    skew_tolerance: chrono::Duration,
6432}
6433
6434impl SigV4aGate {
6435    /// Default `x-amz-date` skew tolerance — 15 min, matching AWS S3.
6436    pub const DEFAULT_SKEW_TOLERANCE_SECS: i64 = 900;
6437
6438    #[must_use]
6439    pub fn new(store: crate::sigv4a::SharedSigV4aCredentialStore) -> Self {
6440        Self {
6441            store,
6442            skew_tolerance: chrono::Duration::seconds(Self::DEFAULT_SKEW_TOLERANCE_SECS),
6443        }
6444    }
6445
6446    /// v0.8.4 #76: override the `x-amz-date` skew tolerance (default
6447    /// 15 min). Operators can widen this for high-clock-drift
6448    /// environments or tighten it for compliance regimes that demand
6449    /// stricter freshness.
6450    #[must_use]
6451    pub fn with_skew_tolerance(mut self, skew: chrono::Duration) -> Self {
6452        self.skew_tolerance = skew;
6453        self
6454    }
6455
6456    /// Read the configured skew tolerance — exposed mostly for test +
6457    /// observability use.
6458    #[must_use]
6459    pub fn skew_tolerance(&self) -> chrono::Duration {
6460        self.skew_tolerance
6461    }
6462
6463    /// Inspect an incoming HTTP request. Behaviour:
6464    ///
6465    /// - Not SigV4a (no `X-Amz-Region-Set` and no SigV4a `Authorization`
6466    ///   prefix) → returns `Ok(())`; the framework's existing SigV4
6467    ///   path handles the request.
6468    /// - SigV4a + valid signature + region match + fresh x-amz-date
6469    ///   → `Ok(())`.
6470    /// - SigV4a + unknown access-key-id → `Err` with `InvalidAccessKeyId`.
6471    /// - SigV4a + bad signature / region mismatch → `Err` with
6472    ///   `SignatureDoesNotMatch`.
6473    /// - SigV4a + missing or skewed `x-amz-date` → `Err` with one of
6474    ///   the v0.8.4 #76 freshness variants (`RequestTimeTooSkewed`
6475    ///   et al.).
6476    ///
6477    /// `canonical_request_bytes` is the SigV4a string-to-sign (or
6478    /// canonical-request bytes; the caller decides) that the framework
6479    /// has already produced for this request. Keeping it as a parameter
6480    /// instead of rebuilding it inside the hook avoids duplicating the
6481    /// canonicalisation logic.
6482    pub fn pre_route<B>(
6483        &self,
6484        req: &http::Request<B>,
6485        requested_region: &str,
6486        canonical_request_bytes: &[u8],
6487    ) -> Result<(), SigV4aGateError> {
6488        self.pre_route_at(
6489            req,
6490            requested_region,
6491            canonical_request_bytes,
6492            chrono::Utc::now(),
6493        )
6494    }
6495
6496    /// Like [`SigV4aGate::pre_route`] but takes an explicit `now` for
6497    /// tests that need to pin the freshness clock. Production callers
6498    /// use `pre_route` (which calls `chrono::Utc::now()`).
6499    pub fn pre_route_at<B>(
6500        &self,
6501        req: &http::Request<B>,
6502        requested_region: &str,
6503        canonical_request_bytes: &[u8],
6504        now: chrono::DateTime<chrono::Utc>,
6505    ) -> Result<(), SigV4aGateError> {
6506        if !crate::sigv4a::detect(req) {
6507            return Ok(());
6508        }
6509        let auth_hdr = req
6510            .headers()
6511            .get(http::header::AUTHORIZATION)
6512            .and_then(|v| v.to_str().ok())
6513            .ok_or(SigV4aGateError::MissingAuthorization)?;
6514        let parsed = crate::sigv4a::parse_authorization_header(auth_hdr)
6515            .map_err(|_| SigV4aGateError::MalformedAuthorization)?;
6516        let region_set = req
6517            .headers()
6518            .get(crate::sigv4a::REGION_SET_HEADER)
6519            .and_then(|v| v.to_str().ok())
6520            .unwrap_or("*");
6521        let key = self
6522            .store
6523            .get(&parsed.access_key_id)
6524            .ok_or_else(|| SigV4aGateError::UnknownAccessKey(parsed.access_key_id.clone()))?;
6525        // v0.8.4 #76: snapshot the request headers into a
6526        // lowercase-keyed flat map so `verify_request` can do the
6527        // x-amz-date freshness checks without taking a generic
6528        // `HeaderMap` dep. Cheap because the headers list is tiny.
6529        let mut header_map: std::collections::HashMap<String, String> =
6530            std::collections::HashMap::with_capacity(req.headers().len());
6531        for (name, value) in req.headers() {
6532            if let Ok(v) = value.to_str() {
6533                header_map.insert(name.as_str().to_ascii_lowercase(), v.to_string());
6534            }
6535        }
6536        crate::sigv4a::verify_request(
6537            &parsed,
6538            &header_map,
6539            canonical_request_bytes,
6540            key,
6541            region_set,
6542            requested_region,
6543            now,
6544            self.skew_tolerance,
6545        )
6546        .map_err(SigV4aGateError::Verify)?;
6547        Ok(())
6548    }
6549}
6550
6551/// Failure modes from [`SigV4aGate::pre_route`]. All variants map to
6552/// HTTP 403 with one of the two AWS-standard error codes
6553/// (`InvalidAccessKeyId` / `SignatureDoesNotMatch` / `RequestTimeTooSkewed`)
6554/// — see [`SigV4aGateError::s3_error_code`].
6555#[derive(Debug, thiserror::Error)]
6556pub enum SigV4aGateError {
6557    #[error("missing Authorization header")]
6558    MissingAuthorization,
6559    #[error("malformed SigV4a Authorization header")]
6560    MalformedAuthorization,
6561    #[error("unknown SigV4a access-key-id: {0}")]
6562    UnknownAccessKey(String),
6563    #[error("SigV4a verification failed: {0}")]
6564    Verify(#[source] crate::sigv4a::SigV4aError),
6565}
6566
6567impl SigV4aGateError {
6568    /// AWS S3 error code that should accompany the response.
6569    ///
6570    /// v0.8.4 #76 (audit H-6): the freshness check surfaces
6571    /// `RequestTimeTooSkewed` (matches AWS spec); date / scope shape
6572    /// failures surface as `InvalidRequest` (400); other failures stay
6573    /// `SignatureDoesNotMatch` / `InvalidAccessKeyId` (403) so the wire
6574    /// surface stays AWS-compatible.
6575    #[must_use]
6576    pub fn s3_error_code(&self) -> &'static str {
6577        match self {
6578            Self::UnknownAccessKey(_) => "InvalidAccessKeyId",
6579            Self::Verify(crate::sigv4a::SigV4aError::RequestTimeTooSkewed { .. }) => {
6580                "RequestTimeTooSkewed"
6581            }
6582            Self::Verify(
6583                crate::sigv4a::SigV4aError::MissingXAmzDate
6584                | crate::sigv4a::SigV4aError::InvalidDateFormat
6585                | crate::sigv4a::SigV4aError::DateScopeMismatch
6586                | crate::sigv4a::SigV4aError::XAmzDateNotSigned
6587                | crate::sigv4a::SigV4aError::InvalidTerminator
6588                | crate::sigv4a::SigV4aError::WrongService { .. }
6589                | crate::sigv4a::SigV4aError::InvalidCredentialScope,
6590            ) => "InvalidRequest",
6591            _ => "SignatureDoesNotMatch",
6592        }
6593    }
6594
6595    /// HTTP status code to accompany the response. v0.8.4 #76: format
6596    /// errors that are clearly client mistakes (missing / malformed
6597    /// `x-amz-date`, malformed credential scope, wrong service) are
6598    /// surfaced as 400 InvalidRequest; the rest stay 403.
6599    #[must_use]
6600    pub fn http_status(&self) -> http::StatusCode {
6601        match self {
6602            Self::Verify(
6603                crate::sigv4a::SigV4aError::MissingXAmzDate
6604                | crate::sigv4a::SigV4aError::InvalidDateFormat
6605                | crate::sigv4a::SigV4aError::DateScopeMismatch
6606                | crate::sigv4a::SigV4aError::XAmzDateNotSigned
6607                | crate::sigv4a::SigV4aError::InvalidTerminator
6608                | crate::sigv4a::SigV4aError::WrongService { .. }
6609                | crate::sigv4a::SigV4aError::InvalidCredentialScope,
6610            ) => http::StatusCode::BAD_REQUEST,
6611            _ => http::StatusCode::FORBIDDEN,
6612        }
6613    }
6614}
6615
6616#[cfg(test)]
6617mod tests {
6618    use super::*;
6619
6620    #[test]
6621    fn manifest_roundtrip_via_metadata() {
6622        let original = ChunkManifest {
6623            codec: CodecKind::CpuZstd,
6624            original_size: 1234,
6625            compressed_size: 567,
6626            crc32c: 0xdead_beef,
6627        };
6628        let mut meta: Option<Metadata> = None;
6629        write_manifest(&mut meta, &original);
6630        let extracted = extract_manifest(&meta).expect("manifest must round-trip");
6631        assert_eq!(extracted.codec, original.codec);
6632        assert_eq!(extracted.original_size, original.original_size);
6633        assert_eq!(extracted.compressed_size, original.compressed_size);
6634        assert_eq!(extracted.crc32c, original.crc32c);
6635    }
6636
6637    #[test]
6638    fn missing_metadata_yields_none() {
6639        let meta: Option<Metadata> = None;
6640        assert!(extract_manifest(&meta).is_none());
6641    }
6642
6643    #[test]
6644    fn partial_metadata_yields_none() {
6645        let mut meta = Metadata::new();
6646        meta.insert(META_CODEC.into(), "cpu-zstd".into());
6647        let opt = Some(meta);
6648        assert!(extract_manifest(&opt).is_none());
6649    }
6650
6651    #[test]
6652    fn parse_copy_source_range_basic() {
6653        let r = parse_copy_source_range("bytes=10-20").unwrap();
6654        match r {
6655            s3s::dto::Range::Int { first, last } => {
6656                assert_eq!(first, 10);
6657                assert_eq!(last, Some(20));
6658            }
6659            _ => panic!("expected Int range"),
6660        }
6661    }
6662
6663    #[test]
6664    fn parse_copy_source_range_rejects_inverted() {
6665        let err = parse_copy_source_range("bytes=20-10").unwrap_err();
6666        assert!(err.contains("last < first"));
6667    }
6668
6669    #[test]
6670    fn parse_copy_source_range_rejects_missing_prefix() {
6671        let err = parse_copy_source_range("10-20").unwrap_err();
6672        assert!(err.contains("must start with 'bytes='"));
6673    }
6674
6675    #[test]
6676    fn parse_copy_source_range_rejects_open_ended() {
6677        // S3 upload_part_copy spec requires N-M (closed); suffix and
6678        // open-ended forms are not allowed for this header.
6679        assert!(parse_copy_source_range("bytes=10-").is_err());
6680        assert!(parse_copy_source_range("bytes=-10").is_err());
6681    }
6682
6683    // v0.7 #49: safe_object_uri must round-trip every legal S3 key
6684    // (which includes spaces, slashes, control chars, raw UTF-8) into
6685    // a parseable `http::Uri` instead of panicking like the previous
6686    // `format!(...).parse().unwrap()` call sites did.
6687
6688    #[test]
6689    fn safe_object_uri_basic_ascii() {
6690        let uri = safe_object_uri("bucket", "key").expect("ascii must be safe");
6691        assert_eq!(uri.path(), "/bucket/key");
6692    }
6693
6694    #[test]
6695    fn safe_object_uri_encodes_spaces() {
6696        let uri = safe_object_uri("bucket", "key with spaces").expect("must encode spaces");
6697        // RFC 3986 path-segment encoding turns ' ' into %20.
6698        assert!(
6699            uri.path().contains("%20"),
6700            "expected percent-encoded space, got {}",
6701            uri.path()
6702        );
6703        assert!(uri.path().starts_with("/bucket/"));
6704    }
6705
6706    #[test]
6707    fn safe_object_uri_preserves_slashes() {
6708        // S3 keys legally contain '/' as a logical path separator —
6709        // the helper must NOT escape it (otherwise the synthetic URI
6710        // changes the perceived hierarchy).
6711        let uri = safe_object_uri("bucket", "key/with/slashes").expect("slashes must round-trip");
6712        assert_eq!(uri.path(), "/bucket/key/with/slashes");
6713    }
6714
6715    #[test]
6716    fn safe_object_uri_handles_newline_without_panic() {
6717        // Newlines are control chars in URIs; whether the result is
6718        // Ok (encoded as %0A) or Err (parse rejects), the helper
6719        // MUST NOT panic. Either outcome is acceptable.
6720        let _ = safe_object_uri("bucket", "key\n");
6721    }
6722
6723    #[test]
6724    fn safe_object_uri_handles_null_byte_without_panic() {
6725        let _ = safe_object_uri("bucket", "key\0bad");
6726    }
6727
6728    #[test]
6729    fn safe_object_uri_handles_unicode_without_panic() {
6730        // RTL override, BOM, plain Japanese — none should panic.
6731        let _ = safe_object_uri("bucket", "rtl\u{202E}override");
6732        let _ = safe_object_uri("bucket", "\u{FEFF}bom-key");
6733        let _ = safe_object_uri("bucket", "日本語キー");
6734    }
6735
6736    #[test]
6737    fn safe_object_uri_no_panic_for_every_byte() {
6738        // Exhaustive byte coverage: 0x00..=0xFF as a 1-byte key.
6739        // None of these may panic. (0x80..=0xFF are not valid UTF-8
6740        // by themselves; we go through `String::from_utf8_lossy` so
6741        // the helper sees a real `&str` regardless of the raw byte.)
6742        for b in 0u8..=255 {
6743            let s = String::from_utf8_lossy(&[b]).into_owned();
6744            let _ = safe_object_uri("bucket", &s);
6745        }
6746    }
6747
6748    /// v0.8.1 #58: smoke test for the DEK-handling shape used by the
6749    /// SSE-KMS branches of `put_object` and `complete_multipart_upload`.
6750    /// Mirrors the call pattern (generate_dek → length check → copy
6751    /// into stack `[u8; 32]` → reborrow as `&[u8; 32]` for `SseSource`)
6752    /// without spinning up a full `S4Service`.
6753    ///
6754    /// The real assertion this guards against is a regression where
6755    /// the `Zeroizing` wrapper is accidentally dropped before the
6756    /// stack copy lands (e.g. someone refactors to use
6757    /// `let dek = kms.generate_dek(...).await?.0; drop(dek); ...`)
6758    /// or where `&**dek` is rewritten in a way that doesn't compile.
6759    #[tokio::test]
6760    async fn kms_dek_lifetime_within_function_scope() {
6761        use crate::kms::{KmsBackend, LocalKms};
6762        use std::collections::HashMap;
6763        use std::path::PathBuf;
6764        use zeroize::Zeroizing;
6765
6766        let mut keks = HashMap::new();
6767        keks.insert("scope".to_string(), [33u8; 32]);
6768        let kms = LocalKms::from_keks(PathBuf::from("/tmp/kms-scope-test"), keks);
6769
6770        // Mirror the put_object KMS branch shape exactly.
6771        let (dek, wrapped) = kms.generate_dek("scope").await.unwrap();
6772        assert_eq!(dek.len(), 32);
6773        let mut dek_arr: Zeroizing<[u8; 32]> = Zeroizing::new([0u8; 32]);
6774        dek_arr.copy_from_slice(&dek);
6775
6776        // The reborrow used at the SseSource construction site —
6777        // mirrors the call-site pattern where `let dek_ref: &[u8; 32]`
6778        // auto-derefs from a `Zeroizing<[u8; 32]>` reference.
6779        let dek_ref: &[u8; 32] = &dek_arr;
6780        // Sanity: the reborrow points at the same bytes.
6781        assert_eq!(dek_ref, &*dek_arr);
6782        // Wrapped key id flows through unchanged.
6783        assert_eq!(wrapped.key_id, "scope");
6784
6785        // At end of scope, both `dek` (Zeroizing<Vec<u8>>) and
6786        // `dek_arr` (Zeroizing<[u8; 32]>) are dropped, wiping the
6787        // backing memory. Cannot directly assert the wipe (would be
6788        // UB to read freed memory), so this test instead enforces
6789        // that the call shape compiles and executes; the wipe itself
6790        // is exercised by the `zeroize` crate's own test suite.
6791    }
6792}