s4_codec/
index.rs

1//! Frame index — Range GET の partial fetch を可能にするための sidecar object 形式。
2//!
3//! ## 課題
4//!
5//! S4-multipart object は `[S4F2 frame]([S4P1 padding][S4F2 frame])*` のシーケンス。
6//! Range GET (e.g. `bytes=N-M`) を効率的に処理するには、(a) どの frame が
7//! decompressed offset N..M に対応しているか、(b) その frame は object body の
8//! どこ (compressed_offset) から始まるか、を知る必要がある。
9//!
10//! ## 解決策
11//!
12//! `<key>.s4index` という sidecar object に下記の binary index を書く:
13//!
14//! ```text
15//! ┌──── v1 32 byte header ─┐
16//! │ S4IX magic (4)         │
17//! │ version u32 (4)        │
18//! │ total_frames u64 (8)   │
19//! │ total_original u64 (8) │
20//! │ total_padded u64 (8)   │  ← S3 上の object サイズ (padding 含む)
21//! └────────────────────────┘
22//! 各 frame について 32 byte:
23//!   original_offset  u64 LE
24//!   original_size    u64 LE
25//!   compressed_offset u64 LE  ← S3 object body における frame header の開始位置
26//!   compressed_size  u64 LE   ← header (28 byte) + payload の合計
27//! ```
28//!
29//! 1000 frame で 32 KB、10000 frame で 320 KB。10 万 frame でも 3.2 MB に収まる。
30//!
31//! ## 使い方
32//!
33//! - PUT: 1 frame の単純 index、PUT 完了後に sidecar 書込
34//! - CompleteMultipartUpload: object 全体を一度 fetch + scan して index を構築
35//! - Range GET: sidecar fetch → `lookup_range(start, end)` で frame 範囲 + S3 byte 範囲を取得
36//!   → backend に partial Range GET → frame parse → decompress → slice
37//!
38//! ## v0.8.4 #73 H-2: source object version binding (v2 header)
39//!
40//! v1 では sidecar に source object の identity が無いため、object overwrite 後に
41//! sidecar が stale のままだと Range GET が **間違った frame** を返す危険があった
42//! (古い byte offset で新 object を partial GET する hazard)。攻撃者が backend を
43//! 直接触れる脅威モデルでは、偽 sidecar を仕込めば任意 frame を露呈させ得る。
44//!
45//! 対策として v2 header に `source_etag` と `source_compressed_size` を追加。GET
46//! 側は HEAD で current etag を取って一致確認 → 不一致なら sidecar を信用せず full
47//! GET path に fall back する。
48//!
49//! ```text
50//! ┌──── v2 header (variable) ┐
51//! │ S4IX magic (4)           │
52//! │ version u32 (4) = 2      │
53//! │ total_frames u64 (8)     │
54//! │ total_original u64 (8)   │
55//! │ total_padded u64 (8)     │
56//! │ source_compressed_size u64 (8)  ← v2 で追加
57//! │ etag_len u32 (4)                 ← v2 で追加 (UTF-8 byte length, 0 = absent)
58//! │ etag bytes (etag_len)            ← v2 で追加 (RFC 7232 entity-tag, quotes 含む)
59//! └──────────────────────────┘
60//! ```
61//!
62//! - **back-compat**: v1 sidecar が backend に既存していれば read-only で `decode_index`
63//!   が `source_etag = None`, `source_compressed_size = None` で復元する。GET 側は
64//!   `None` を見たら "legacy sidecar — verify skip, full GET にも fallback できる"
65//!   と扱う (= 既存挙動保持)。
66//! - **新規 PUT**: 常に v2 を書く。`source_etag` は backend response の e_tag、
67//!   `source_compressed_size` は put body 長 (= `total_padded_size`) が原則。
68//!
69//! ## v0.9 #106 encryption-aware sidecar (v3 header)
70//!
71//! v0.8.12 #120 で SSE 有効時の sidecar emission を全 skip にしたため、SSE-S4 /
72//! SSE-KMS / SSE-C いずれかが有効な object の Range GET は **常に buffered fallback**
73//! (full body fetch → 全 decrypt → frame parse → slice) になっていた。5 GiB の
74//! SSE-S4 object に 100 byte の Range GET を投げると 5 GiB 転送が発生する。
75//!
76//! v0.9 #106 はこれを SSE-S4 chunked (S4E6 / `--sse-chunk-size > 0`) **だけ** で
77//! 解消する。S4E6 は per-chunk 独立 decrypt 可能なので、必要な chunk 範囲だけを
78//! backend に partial GET → 該当 chunk(s) を decrypt → frame parse → slice という
79//! partial-fetch 経路が成立する。SSE-KMS / SSE-C / S4E2 buffered は引き続き
80//! v0.8.12 #120 の buffered fallback (= sidecar 全 skip)。
81//!
82//! v3 header は v2 の etag 末尾に SSE chunk geometry block を 30 byte 追加した形:
83//!
84//! ```text
85//! ┌──── v3 header (variable) ──┐
86//! │ S4IX magic (4)             │
87//! │ version u32 (4) = 3        │
88//! │ total_frames u64 (8)       │
89//! │ total_original u64 (8)     │
90//! │ total_padded u64 (8)       │   ← S3 上の post-encrypt object サイズ
91//! │ source_compressed_size u64 │   ← v2 から継承
92//! │ etag_len u32 (4)           │
93//! │ etag bytes (etag_len)      │
94//! │ ── v3 SSE block (30 B) ─── │   ← v3 で追加
95//! │ enc_chunk_size  u32 LE (4) │   ← S4E6 plaintext bytes per chunk
96//! │ enc_chunk_count u32 LE (4) │
97//! │ enc_key_id      u16 LE (2) │
98//! │ enc_salt        [u8; 8]    │   ← S4E6 per-PUT salt (nonce derivation 用)
99//! │ enc_plaintext_len u64 LE   │   ← pre-encrypt body 長 (= post-compress)
100//! │ enc_header_bytes u32 LE    │   ← 24 (S4E6_HEADER_BYTES)
101//! └────────────────────────────┘
102//! ```
103//!
104//! v3 では `compressed_offset` / `compressed_size` は **pre-encrypt body** (=
105//! post-compress framed body) の offset のまま。 GET 側は、その offset を
106//! `enc_chunk_size` で割って enclosing chunk index を計算し、そのチャンク群だけを
107//! backend から partial GET、decrypt、frame parse、slice する。
108//!
109//! - **back-compat**: v1 / v2 sidecar の decode 経路は 1 bit も触らない。
110//!   `decode_index` の version dispatch に `v if v == 3` arm を増やすだけ。
111//!   v3 sidecar を v0.8.x server (v2 のみ) が GET しても `UnsupportedVersion(3)` で
112//!   sidecar 無効化 → 既存 buffered fallback に落ちるので破壊しない。
113//! - **GET path**: v3 sidecar の `enc_chunk_size > 0` を見たら encrypted Range GET
114//!   fast-path 起動。`enc_chunk_size == 0` は「非 SSE で v3 を書きたい場合」用の
115//!   将来余地 (現状 server は SSE-S4 chunked 専用に v3 を emit、 他は v2 のまま)。
116//! - **scope out**: SSE-KMS / SSE-C / S4E2 buffered (`--sse-chunk-size 0`) /
117//!   multipart は v3 sidecar を emit しない (= 既存 v0.8.12 #120 挙動保持、
118//!   buffered fallback 経路)。 v0.10+ の roadmap。
119
120use bytes::{Buf, BufMut, Bytes, BytesMut};
121use thiserror::Error;
122
123pub const INDEX_MAGIC: &[u8; 4] = b"S4IX";
124/// v0.9 #106: bumped 2 → 3. v3 appends a 30-byte SSE chunk-geometry
125/// block (enc_chunk_size, enc_chunk_count, enc_key_id, enc_salt,
126/// enc_plaintext_len, enc_header_bytes) after the v2 etag payload so
127/// the GET path can compute encrypted byte ranges for SSE-S4 chunked
128/// (S4E6) objects and run partial-fetch + per-chunk decrypt without a
129/// full body read. v1 / v2 readers stay as back-compat paths
130/// (`decode_index` dispatches on the version field — a v3 sidecar
131/// read by a v0.8.x server surfaces as `UnsupportedVersion(3)` and
132/// drops out to the existing buffered fallback).
133pub const INDEX_VERSION: u32 = 3;
134/// v0.8.4 #73 H-2 era version. Retained as a write-side option for
135/// non-encrypted single-PUT objects (see [`encode_index`] — emits v3
136/// only when `enc_chunk_size > 0`, i.e. SSE-S4 chunked is active,
137/// otherwise v2). Decode-side keeps reading both.
138pub const INDEX_VERSION_V2: u32 = 2;
139/// Legacy v1 fixed header — kept for tests / back-compat readers.
140pub const INDEX_VERSION_V1: u32 = 1;
141/// v1 fixed header layout (kept for back-compat readers).
142pub const HEADER_FIXED_V1: usize = 4 + 4 + 8 + 8 + 8; // 32
143/// v2 fixed header layout (`HEADER_FIXED_V1` + `source_compressed_size` u64 +
144/// `etag_len` u32). The variable-length `etag` payload follows.
145pub const HEADER_FIXED_V2: usize = HEADER_FIXED_V1 + 8 + 4; // 44
146/// v0.9 #106: v3 SSE chunk-geometry block size (always appended after the
147/// v2 etag payload when `version == 3`). Fields: `enc_chunk_size u32 +
148/// enc_chunk_count u32 + enc_key_id u16 + enc_salt [u8;8] +
149/// enc_plaintext_len u64 + enc_header_bytes u32`.
150pub const SSE_BLOCK_V3: usize = 4 + 4 + 2 + 8 + 8 + 4; // = 30
151/// v0.8.16 F-15: kept for back-compat with external consumers that
152/// imported the v0.8.10-era constant. **DEPRECATED** — the value
153/// `40` was a typo (it should have been `44` for the v2 fixed
154/// header). Use [`HEADER_FIXED_V1`] / [`HEADER_FIXED_V2`] directly.
155#[deprecated(
156    since = "0.8.16",
157    note = "INDEX_HEADER_BYTES was an off-by-4 typo; use HEADER_FIXED_V1 or HEADER_FIXED_V2 instead"
158)]
159pub const INDEX_HEADER_BYTES: usize = HEADER_FIXED_V2;
160pub const ENTRY_BYTES: usize = 8 + 8 + 8 + 8;
161
162#[derive(Debug, Clone, PartialEq, Eq)]
163pub struct FrameIndexEntry {
164    /// この frame が担当する decompressed byte 範囲の開始 (累計、0-based)
165    pub original_offset: u64,
166    /// 解凍後 byte 数 (frame header の original_size と同じ)
167    pub original_size: u64,
168    /// S3 object body 内での frame 開始位置 (S4F2 magic の先頭 byte)
169    pub compressed_offset: u64,
170    /// frame 全体のバイト数 (28 byte header + payload)
171    pub compressed_size: u64,
172}
173
174impl FrameIndexEntry {
175    /// v0.8.15 H-a: was plain `self.original_offset + self.original_size`,
176    /// which panics in `dev` (workspace `overflow_checks = true`) and
177    /// wraps in release on an attacker-supplied sidecar entry with
178    /// `original_offset = u64::MAX - 10` and `original_size = 100`.
179    /// `decode_index` now also pre-validates each entry below, so this
180    /// `saturating_add` is defence-in-depth — a corrupted in-memory
181    /// `FrameIndexEntry` cannot crash the gateway through `binary_search_by`.
182    pub fn original_end(&self) -> u64 {
183        self.original_offset.saturating_add(self.original_size)
184    }
185    pub fn compressed_end(&self) -> u64 {
186        self.compressed_offset.saturating_add(self.compressed_size)
187    }
188}
189
190#[derive(Debug, Clone, Default, PartialEq, Eq)]
191pub struct FrameIndex {
192    /// S3 上の object 全体サイズ (padding frame 含む)
193    pub total_padded_size: u64,
194    pub entries: Vec<FrameIndexEntry>,
195    /// v0.8.4 #73 H-2: backend-reported ETag of the source object the
196    /// sidecar describes. Populated by `s4-server::put_object` from the
197    /// backend's PUT response so the matching GET can `head_object` and
198    /// confirm it's still talking about the same body. `None` for legacy
199    /// (v1) sidecars decoded out of an existing backend, in which case
200    /// the GET path treats the partial-fetch as best-effort and falls
201    /// back to a full read on any inconsistency signal.
202    pub source_etag: Option<String>,
203    /// v0.8.4 #73 H-2: backend object's compressed bytes length the sidecar
204    /// was computed against. Cross-check signal alongside `source_etag` —
205    /// some backends (lifecycle moves, multi-object operations) can change
206    /// the bytes without a fresh ETag, so a size mismatch is independently
207    /// load-bearing. `None` on legacy v1 sidecars.
208    pub source_compressed_size: Option<u64>,
209    /// v0.9 #106: SSE-S4 chunked (S4E6) geometry binding. `Some(..)` if
210    /// the source object was encrypted with `--sse-chunk-size > 0` —
211    /// in that case the on-disk body is an S4E6 frame and the GET path
212    /// can map `compressed_offset` (pre-encrypt) → enclosing chunk
213    /// index → encrypted byte range, fetch just those chunks, and
214    /// decrypt them independently. `None` for v1 / v2 sidecars and
215    /// for v3 sidecars written under non-SSE / SSE-KMS / SSE-C /
216    /// S4E2-buffered (which keep the v0.8.12 #120 buffered fallback).
217    pub sse_v3: Option<SseChunkBinding>,
218}
219
220/// v0.9 #106: per-object SSE-S4 chunked (S4E6) geometry, stored in v3
221/// sidecars. The salt + key_id let the GET path derive the per-chunk
222/// AES-GCM nonce + AAD without re-fetching the encrypted body's
223/// header bytes. Salt is **not secret** (it lives in the on-disk
224/// S4E6 header in plaintext anyway); duplicating it here saves one
225/// HEAD/GET round-trip per Range request.
226#[derive(Debug, Clone, Copy, PartialEq, Eq)]
227pub struct SseChunkBinding {
228    /// S4E6 plaintext bytes per chunk (matches `--sse-chunk-size`).
229    /// Must be `> 0` for an SSE binding to be considered valid.
230    pub enc_chunk_size: u32,
231    /// Total number of S4E6 chunks in the body (= `ceil(plaintext.len() /
232    /// enc_chunk_size)`, always `>= 1`).
233    pub enc_chunk_count: u32,
234    /// Keyring slot the active key was at PUT time. The GET path uses
235    /// it to look up the same key for decrypt.
236    pub enc_key_id: u16,
237    /// 8-byte per-PUT random salt (S4E6). Fed into the nonce + AAD
238    /// derivation; lives in the encrypted body's header in plaintext
239    /// anyway, so duplicating it in the sidecar leaks nothing.
240    pub enc_salt: [u8; 8],
241    /// Pre-encrypt plaintext body length (= post-compress, post-frame
242    /// body length). Used to validate the chunk-walk math against the
243    /// encrypted body length the backend reports.
244    pub enc_plaintext_len: u64,
245    /// Fixed header size of the S4E6 frame (== `S4E6_HEADER_BYTES` =
246    /// 24 today). Carried explicitly so a future S4E7-style bump
247    /// doesn't silently break v3 sidecar decode.
248    pub enc_header_bytes: u32,
249}
250
251impl FrameIndex {
252    pub fn total_original_size(&self) -> u64 {
253        self.entries.last().map(|e| e.original_end()).unwrap_or(0)
254    }
255
256    /// Range request `[start, end_exclusive)` を解決して必要 frame の (start_idx, end_idx_exclusive)
257    /// と S3 上の partial-fetch byte range `[byte_start, byte_end_exclusive)` を返す。
258    ///
259    /// 1 frame でもオーバーラップしていればその frame の **全 byte** を fetch する
260    /// (= 部分 frame は decompress 単位)。
261    pub fn lookup_range(&self, start: u64, end_exclusive: u64) -> Option<RangePlan> {
262        if self.entries.is_empty() || start >= end_exclusive {
263            return None;
264        }
265        let total = self.total_original_size();
266        if start >= total {
267            return None;
268        }
269        let clamped_end = end_exclusive.min(total);
270
271        // start を含む frame を二分探索 (entries は original_offset 昇順)
272        let first_idx = match self.entries.binary_search_by(|e| {
273            if e.original_end() <= start {
274                std::cmp::Ordering::Less
275            } else if e.original_offset > start {
276                std::cmp::Ordering::Greater
277            } else {
278                std::cmp::Ordering::Equal
279            }
280        }) {
281            Ok(i) => i,
282            Err(_) => return None,
283        };
284        // end を含む frame (end-1 を含むもの)
285        let last_inclusive = clamped_end - 1;
286        let last_idx = match self.entries.binary_search_by(|e| {
287            if e.original_end() <= last_inclusive {
288                std::cmp::Ordering::Less
289            } else if e.original_offset > last_inclusive {
290                std::cmp::Ordering::Greater
291            } else {
292                std::cmp::Ordering::Equal
293            }
294        }) {
295            Ok(i) => i,
296            Err(_) => return None,
297        };
298
299        let byte_start = self.entries[first_idx].compressed_offset;
300        let byte_end_exclusive = self.entries[last_idx].compressed_end();
301        Some(RangePlan {
302            first_frame_idx: first_idx,
303            last_frame_idx_inclusive: last_idx,
304            byte_start,
305            byte_end_exclusive,
306            // slice 開始 / 終了の original 内 offset
307            slice_start_in_combined: start - self.entries[first_idx].original_offset,
308            slice_end_in_combined: clamped_end - self.entries[first_idx].original_offset,
309        })
310    }
311}
312
313/// `lookup_range` の結果。`byte_start..byte_end_exclusive` を S3 から fetch、
314/// 該当 frames を decompress し、結果バイト列を `[slice_start_in_combined,
315/// slice_end_in_combined)` で slice すれば最終結果。
316#[derive(Debug, Clone, PartialEq, Eq)]
317pub struct RangePlan {
318    pub first_frame_idx: usize,
319    pub last_frame_idx_inclusive: usize,
320    pub byte_start: u64,
321    pub byte_end_exclusive: u64,
322    pub slice_start_in_combined: u64,
323    pub slice_end_in_combined: u64,
324}
325
326/// v0.9 #106: encrypted partial-fetch plan for the SSE-S4 chunked
327/// (S4E6) Range GET fast-path. Given a [`RangePlan`] (which describes
328/// the *pre-encrypt* byte range to fetch) plus the per-object SSE
329/// binding, computes the actual encrypted byte range to send the
330/// backend, the S4E6 chunk index range to walk, and where the
331/// pre-encrypt byte range lands inside the decrypted chunk
332/// concatenation.
333///
334/// Caller workflow:
335///
336/// 1. backend partial GET `body[enc_byte_start..enc_byte_end_exclusive)`
337///    plus the S4E6 fixed header (already cached in the sidecar's
338///    `SseChunkBinding`, no extra fetch).
339/// 2. for `chunk_idx in chunk_idx_start..=chunk_idx_last_inclusive`,
340///    `decrypt_chunk(chunk_idx, &body[..])` — the sidecar's salt +
341///    key_id provide the AAD / nonce material.
342/// 3. concatenate the decrypted plaintext, slice off
343///    `pre_encrypt_slice_start_in_concat..pre_encrypt_slice_end_in_concat`
344///    to land at the [`RangePlan`]'s `byte_start..byte_end_exclusive`
345///    (= pre-encrypt) range.
346/// 4. frame-parse + decompress + final slice via the existing
347///    [`RangePlan`] machinery.
348#[derive(Debug, Clone, PartialEq, Eq)]
349pub struct EncryptedRangePlan {
350    /// First S4E6 chunk that overlaps the requested pre-encrypt range
351    /// (inclusive).
352    pub chunk_idx_start: u32,
353    /// Last S4E6 chunk that overlaps the requested pre-encrypt range
354    /// (inclusive).
355    pub chunk_idx_last_inclusive: u32,
356    /// Byte offset within the **encrypted** backend body where the
357    /// fetch starts (covers chunk `chunk_idx_start`'s tag + ciphertext).
358    pub enc_byte_start: u64,
359    /// Byte offset (exclusive) within the encrypted backend body where
360    /// the fetch ends (covers through chunk `chunk_idx_last_inclusive`).
361    pub enc_byte_end_exclusive: u64,
362    /// Offset within the decrypted-chunk concatenation where the
363    /// pre-encrypt slice starts (= `RangePlan.byte_start - chunk_idx_start *
364    /// enc_chunk_size`).
365    pub pre_encrypt_slice_start_in_concat: u64,
366    /// Offset within the decrypted-chunk concatenation where the
367    /// pre-encrypt slice ends (exclusive).
368    pub pre_encrypt_slice_end_in_concat: u64,
369}
370
371impl SseChunkBinding {
372    /// Per-chunk on-disk byte cost: ciphertext (= plaintext bytes,
373    /// AES-GCM is CTR-mode) + 16-byte auth tag. Final chunk may carry
374    /// fewer plaintext bytes; this helper returns the *non-final*
375    /// chunk cost.
376    pub fn enc_chunk_stride(&self) -> u64 {
377        self.enc_chunk_size as u64 + 16
378    }
379
380    /// On-disk byte length of chunk `chunk_idx`. Non-final chunks
381    /// carry `enc_chunk_size` plaintext bytes (+ 16-byte tag); the
382    /// final chunk carries `enc_plaintext_len - (chunk_count - 1) *
383    /// enc_chunk_size` plaintext bytes (+ tag).
384    pub fn enc_chunk_on_disk_size(&self, chunk_idx: u32) -> u64 {
385        if chunk_idx + 1 < self.enc_chunk_count {
386            self.enc_chunk_stride()
387        } else {
388            // Final chunk: total plaintext minus the chunks before it.
389            let prior = (chunk_idx as u64).saturating_mul(self.enc_chunk_size as u64);
390            let final_pt = self.enc_plaintext_len.saturating_sub(prior);
391            final_pt + 16
392        }
393    }
394
395    /// Encrypted-body byte offset of the *start* of chunk `chunk_idx`
396    /// (= the chunk's tag byte). Non-final chunks stride at
397    /// `enc_chunk_stride()`.
398    pub fn enc_chunk_byte_offset(&self, chunk_idx: u32) -> u64 {
399        self.enc_header_bytes as u64 + (chunk_idx as u64).saturating_mul(self.enc_chunk_stride())
400    }
401}
402
403impl FrameIndex {
404    /// v0.9 #106: extend a [`RangePlan`] (pre-encrypt byte range) to
405    /// an [`EncryptedRangePlan`] that names the actual encrypted
406    /// chunks to fetch + decrypt. Returns `None` if the index lacks
407    /// an [`SseChunkBinding`] (= non-SSE / v1 / v2 sidecar), or if
408    /// the [`RangePlan`]'s pre-encrypt range falls outside the SSE
409    /// binding's declared plaintext length (= sidecar / body
410    /// mismatch — caller should fall back to the buffered path).
411    pub fn encrypted_lookup(&self, plan: &RangePlan) -> Option<EncryptedRangePlan> {
412        let sse = self.sse_v3.as_ref()?;
413        if sse.enc_chunk_size == 0 || sse.enc_chunk_count == 0 {
414            return None;
415        }
416        if plan.byte_end_exclusive > sse.enc_plaintext_len
417            || plan.byte_start >= plan.byte_end_exclusive
418        {
419            return None;
420        }
421        let chunk_size = sse.enc_chunk_size as u64;
422        let chunk_idx_start_u64 = plan.byte_start / chunk_size;
423        let chunk_idx_last_u64 = (plan.byte_end_exclusive - 1) / chunk_size;
424        // Belt-and-braces: refuse any chunk index outside the declared
425        // chunk_count window. Either the sidecar is internally
426        // inconsistent or `compressed_offset` exceeded the SSE
427        // plaintext length (caught above) — both mean the sidecar is
428        // not trustworthy for this Range, so the GET path should fall
429        // back to the buffered full read.
430        if chunk_idx_last_u64 >= sse.enc_chunk_count as u64 {
431            return None;
432        }
433        let chunk_idx_start = chunk_idx_start_u64 as u32;
434        let chunk_idx_last_inclusive = chunk_idx_last_u64 as u32;
435        let enc_byte_start = sse.enc_chunk_byte_offset(chunk_idx_start);
436        let enc_byte_end_exclusive = sse.enc_chunk_byte_offset(chunk_idx_last_inclusive)
437            + sse.enc_chunk_on_disk_size(chunk_idx_last_inclusive);
438        let pre_encrypt_slice_start_in_concat =
439            plan.byte_start - (chunk_idx_start as u64) * chunk_size;
440        let pre_encrypt_slice_end_in_concat =
441            plan.byte_end_exclusive - (chunk_idx_start as u64) * chunk_size;
442        Some(EncryptedRangePlan {
443            chunk_idx_start,
444            chunk_idx_last_inclusive,
445            enc_byte_start,
446            enc_byte_end_exclusive,
447            pre_encrypt_slice_start_in_concat,
448            pre_encrypt_slice_end_in_concat,
449        })
450    }
451}
452
453/// v1.0 stability: `#[non_exhaustive]` — new validation guards may be
454/// added in minor releases. Downstream callers must include a `_ =>`
455/// arm when matching on this enum.
456#[derive(Debug, Error)]
457#[non_exhaustive]
458pub enum IndexError {
459    #[error("index too short: {0} bytes")]
460    TooShort(usize),
461    #[error("bad index magic: {got:?}")]
462    BadMagic { got: [u8; 4] },
463    #[error("unsupported index version {0} (this build supports {INDEX_VERSION})")]
464    UnsupportedVersion(u32),
465    #[error("entry count {claimed} doesn't match buffer remaining {remaining}")]
466    EntryCountMismatch { claimed: u64, remaining: usize },
467    /// v0.8.15 H-a: an entry's `original_offset + original_size` or
468    /// `compressed_offset + compressed_size` overflows `u64`. The
469    /// downstream `binary_search_by` / `lookup_range` machinery
470    /// assumes monotonically-increasing offsets — overflow would let
471    /// a forged sidecar drive the range planner into garbage state.
472    #[error(
473        "frame index entry overflows: original_offset={ooff}, original_size={osize}, \
474         compressed_offset={coff}, compressed_size={csize}"
475    )]
476    EntryOverflow {
477        ooff: u64,
478        osize: u64,
479        coff: u64,
480        csize: u64,
481    },
482    /// v0.8.15 H-c: per-sidecar entry-count cap. Pairs with the v0.8.12
483    /// `#124` `Vec::with_capacity` clamp — refuses pathologically-large
484    /// `n` at parse time even before the `expected_remaining == input.len()`
485    /// guard, so a 32-bit target can't be tricked into running `0..n`
486    /// past the buffer.
487    #[error("frame index entry count {got} exceeds MAX_FRAMES={max}")]
488    TooManyFrames { got: u64, max: u64 },
489    /// v0.8.15 H-c: `etag_len` exceeds the maximum addressable size on
490    /// this target (32-bit) or the operator-configured cap.
491    #[error("sidecar etag_len {got} exceeds MAX_ETAG_BYTES={max}")]
492    EtagTooLong { got: u32, max: u32 },
493    /// v0.8.16 F-2: consecutive entries are not in non-decreasing
494    /// order. `binary_search_by` / `lookup_range` rely on the
495    /// invariant that `entries[i+1].original_offset >=
496    /// entries[i].original_end()` (and the same for `compressed_*`).
497    /// A forged sidecar violating that lets a Range GET drive
498    /// `RangePlan.byte_end_exclusive` to a u64-wrapped value.
499    #[error(
500        "frame index entries out of order: prev_original_end={prev_original_end}, \
501         curr_original_offset={curr_original_offset}, prev_compressed_end={prev_compressed_end}, \
502         curr_compressed_offset={curr_compressed_offset}"
503    )]
504    NonMonotonicEntries {
505        prev_original_end: u64,
506        curr_original_offset: u64,
507        prev_compressed_end: u64,
508        curr_compressed_offset: u64,
509    },
510}
511
512/// v0.8.15 H-c: hard upper bound on the number of entries
513/// [`decode_index`] will accept. 16 M × 32 B = 512 MiB sidecar
514/// body — orders of magnitude over any real workload (a typical
515/// 5 GiB object hits ~1280 frames at the 4 MiB default chunk).
516/// Above this we'd be parsing an attacker payload, not a legitimate
517/// sidecar.
518pub const MAX_FRAMES: u64 = 16 * 1024 * 1024;
519/// v0.8.15 H-c: hard upper bound on the etag-length field. AWS S3
520/// ETags are ≤ 64 bytes including quotes; MinIO / Garage match. The
521/// 4 KiB cap leaves room for non-canonical multipart ETags
522/// (`<hex>-<n>`) without admitting attacker-controlled payloads.
523pub const MAX_ETAG_BYTES: u32 = 4096;
524
525/// v0.8.4 #73 H-2: emit the v2 layout (with `source_etag` /
526/// `source_compressed_size`). Pre-v0.8.4 deployments that PUT under v1 are
527/// still readable (decode_index dispatches on the version field) — only the
528/// writer path is bumped here.
529///
530/// v0.9 #106: when `idx.sse_v3` is `Some(..)` (= source object was
531/// SSE-S4 chunked / S4E6), emit v3 instead — same v2 layout plus a
532/// trailing 30-byte SSE chunk-geometry block before the entries
533/// table. v0.8.x readers ignore unknown versions (`UnsupportedVersion(3)`)
534/// → sidecar is treated as missing → Range GET falls back to the
535/// existing buffered fallback, so v3 is forward-safe.
536pub fn encode_index(idx: &FrameIndex) -> Bytes {
537    let etag_bytes = idx.source_etag.as_deref().unwrap_or("").as_bytes();
538    let (version, fixed_header) = if idx.sse_v3.is_some() {
539        (INDEX_VERSION, HEADER_FIXED_V2 + SSE_BLOCK_V3)
540    } else {
541        (INDEX_VERSION_V2, HEADER_FIXED_V2)
542    };
543    let mut buf =
544        BytesMut::with_capacity(fixed_header + etag_bytes.len() + idx.entries.len() * ENTRY_BYTES);
545    buf.put_slice(INDEX_MAGIC);
546    buf.put_u32_le(version);
547    buf.put_u64_le(idx.entries.len() as u64);
548    buf.put_u64_le(idx.total_original_size());
549    buf.put_u64_le(idx.total_padded_size);
550    // v2 additions
551    buf.put_u64_le(idx.source_compressed_size.unwrap_or(0));
552    buf.put_u32_le(etag_bytes.len() as u32);
553    buf.put_slice(etag_bytes);
554    // v3 SSE block, only when an SSE binding is present.
555    if let Some(sse) = idx.sse_v3.as_ref() {
556        buf.put_u32_le(sse.enc_chunk_size);
557        buf.put_u32_le(sse.enc_chunk_count);
558        buf.put_u16_le(sse.enc_key_id);
559        buf.put_slice(&sse.enc_salt);
560        buf.put_u64_le(sse.enc_plaintext_len);
561        buf.put_u32_le(sse.enc_header_bytes);
562    }
563    for e in &idx.entries {
564        buf.put_u64_le(e.original_offset);
565        buf.put_u64_le(e.original_size);
566        buf.put_u64_le(e.compressed_offset);
567        buf.put_u64_le(e.compressed_size);
568    }
569    buf.freeze()
570}
571
572/// v0.8.4 #73 H-2: legacy v1 encoder retained for the back-compat unit test
573/// (`sidecar_header_back_compat_old_format_no_source_etag`) which has to
574/// synthesize a v1 buffer to prove decode_index still parses it. Production
575/// callers should always go through [`encode_index`] which emits v2.
576///
577/// v1.0 F3: `#[cfg(test)]`-gated so it never appears in the v1.0 public API
578/// contract. Only the same-file `#[cfg(test)] mod tests` consumes it; no
579/// integration test, fuzz target, or bench reaches it.
580#[cfg(test)]
581pub(crate) fn encode_index_v1_for_test(idx: &FrameIndex) -> Bytes {
582    let mut buf = BytesMut::with_capacity(HEADER_FIXED_V1 + idx.entries.len() * ENTRY_BYTES);
583    buf.put_slice(INDEX_MAGIC);
584    buf.put_u32_le(INDEX_VERSION_V1);
585    buf.put_u64_le(idx.entries.len() as u64);
586    buf.put_u64_le(idx.total_original_size());
587    buf.put_u64_le(idx.total_padded_size);
588    for e in &idx.entries {
589        buf.put_u64_le(e.original_offset);
590        buf.put_u64_le(e.original_size);
591        buf.put_u64_le(e.compressed_offset);
592        buf.put_u64_le(e.compressed_size);
593    }
594    buf.freeze()
595}
596
597pub fn decode_index(mut input: Bytes) -> Result<FrameIndex, IndexError> {
598    if input.len() < HEADER_FIXED_V1 {
599        return Err(IndexError::TooShort(input.len()));
600    }
601    let mut magic = [0u8; 4];
602    magic.copy_from_slice(&input[..4]);
603    if &magic != INDEX_MAGIC {
604        return Err(IndexError::BadMagic { got: magic });
605    }
606    input.advance(4);
607    let version = input.get_u32_le();
608    let n = input.get_u64_le();
609    let _total_original = input.get_u64_le();
610    let total_padded_size = input.get_u64_le();
611    // v0.8.15 H-c: hard cap on `n` *before* any size arithmetic. The
612    // existing `expected_remaining == input.len()` check is a
613    // necessary condition but not sufficient — on a 32-bit target,
614    // `n as usize` truncates a 33-bit value and the buffer check
615    // would silently pass with the wrong loop count. Reject early.
616    if n > MAX_FRAMES {
617        return Err(IndexError::TooManyFrames {
618            got: n,
619            max: MAX_FRAMES,
620        });
621    }
622    // Dispatch on version. v1 jumps straight to the entry table; v2
623    // reads the additional fixed fields + variable-length etag before
624    // the entries; v3 reads the v2 layout plus the SSE chunk-geometry
625    // block.
626    let (source_compressed_size, source_etag, sse_v3) = match version {
627        v if v == INDEX_VERSION_V1 => (None, None, None),
628        v if v == INDEX_VERSION_V2 || v == INDEX_VERSION => {
629            // v2 fixed-header tail: source_compressed_size (u64) + etag_len (u32).
630            if input.len() < 8 + 4 {
631                return Err(IndexError::TooShort(input.len()));
632            }
633            let scs = input.get_u64_le();
634            let etag_len_u32 = input.get_u32_le();
635            // v0.8.15 H-c: bound `etag_len` *before* the `as usize`
636            // cast so the buffer check on a 32-bit WASM target can't
637            // be tricked into a usize-truncated value.
638            if etag_len_u32 > MAX_ETAG_BYTES {
639                return Err(IndexError::EtagTooLong {
640                    got: etag_len_u32,
641                    max: MAX_ETAG_BYTES,
642                });
643            }
644            let etag_len = etag_len_u32 as usize;
645            if input.len() < etag_len {
646                return Err(IndexError::TooShort(input.len()));
647            }
648            // Slice off the etag bytes; treat decode failure as "no etag" so
649            // a corrupted etag field still leaves a usable index (the GET
650            // path will fall back to full read on the missing binding).
651            let etag_bytes = input.split_to(etag_len);
652            let etag = if etag_len == 0 {
653                None
654            } else {
655                std::str::from_utf8(&etag_bytes).ok().map(str::to_owned)
656            };
657            // v0.9 #106: v3 appends the 30-byte SSE chunk-geometry
658            // block after the etag payload (before the entries table).
659            // A v3 sidecar with `enc_chunk_size == 0` is treated as
660            // "no SSE binding" (= equivalent to v2 semantics) so the
661            // writer side has a forward-safe encoding for the
662            // non-SSE-S4-chunked path if it ever needs to bump
663            // version without populating the SSE binding.
664            let sse_binding = if v == INDEX_VERSION {
665                if input.len() < SSE_BLOCK_V3 {
666                    return Err(IndexError::TooShort(input.len()));
667                }
668                let enc_chunk_size = input.get_u32_le();
669                let enc_chunk_count = input.get_u32_le();
670                let enc_key_id = input.get_u16_le();
671                let mut enc_salt = [0u8; 8];
672                input.copy_to_slice(&mut enc_salt);
673                let enc_plaintext_len = input.get_u64_le();
674                let enc_header_bytes = input.get_u32_le();
675                if enc_chunk_size == 0 || enc_chunk_count == 0 {
676                    None
677                } else {
678                    Some(SseChunkBinding {
679                        enc_chunk_size,
680                        enc_chunk_count,
681                        enc_key_id,
682                        enc_salt,
683                        enc_plaintext_len,
684                        enc_header_bytes,
685                    })
686                }
687            } else {
688                None
689            };
690            (if scs == 0 { None } else { Some(scs) }, etag, sse_binding)
691        }
692        other => return Err(IndexError::UnsupportedVersion(other)),
693    };
694    // v0.8.15 H-c: `n * ENTRY_BYTES` cannot overflow `usize` here
695    // because `n <= MAX_FRAMES = 16M` and `ENTRY_BYTES = 32`, and on
696    // 32-bit targets the resulting value fits in `usize` (≤ 512
697    // MiB). The `as usize` cast on `n` is now bounded by the same
698    // ceiling.
699    let expected_remaining = (n as usize).saturating_mul(ENTRY_BYTES);
700    if input.len() != expected_remaining {
701        return Err(IndexError::EntryCountMismatch {
702            claimed: n,
703            remaining: input.len(),
704        });
705    }
706    // v0.8.12 HIGH-14 fix: clamp the initial allocation the way the
707    // CpuZstd / CpuGzip decompress path does (see
708    // `DECOMPRESS_BOOTSTRAP_CAPACITY` in `lib.rs`, landed in #89).
709    // A forged sidecar with `n = 100_000_000` paired with a 3.2 GiB
710    // body (the only way the `expected_remaining` check above passes
711    // for that `n`) would otherwise commit ~3.2 GiB of `FrameIndexEntry`
712    // slots up front, on top of the 3.2 GiB body bytes already in
713    // RAM. The honest cap is 4096 entries (128 KiB at
714    // `ENTRY_BYTES = 32`) — large enough that single-PUT framed and
715    // typical multipart objects don't pay any growth cost, small
716    // enough that an adversarial sidecar can't drive multi-GiB
717    // pre-allocations behind the bounded `expected_remaining`
718    // check. The `push` loop below grows the vector naturally and
719    // is itself bounded by `expected_remaining == input.len()`.
720    const BOOTSTRAP_ENTRIES: usize = 4096;
721    let initial_cap = (n as usize).min(BOOTSTRAP_ENTRIES);
722    let mut entries = Vec::with_capacity(initial_cap);
723    for _ in 0..n {
724        let original_offset = input.get_u64_le();
725        let original_size = input.get_u64_le();
726        let compressed_offset = input.get_u64_le();
727        let compressed_size = input.get_u64_le();
728        // v0.8.15 H-a: refuse entries whose `offset + size` overflows
729        // `u64`. The downstream `binary_search_by` / `lookup_range`
730        // machinery relies on monotone offsets — a wrapped value
731        // would let a forged sidecar drive `RangePlan.byte_end_exclusive`
732        // to garbage.
733        if original_offset.checked_add(original_size).is_none()
734            || compressed_offset.checked_add(compressed_size).is_none()
735        {
736            return Err(IndexError::EntryOverflow {
737                ooff: original_offset,
738                osize: original_size,
739                coff: compressed_offset,
740                csize: compressed_size,
741            });
742        }
743        entries.push(FrameIndexEntry {
744            original_offset,
745            original_size,
746            compressed_offset,
747            compressed_size,
748        });
749    }
750    // v0.8.16 F-2: inter-entry monotonicity. v0.8.15 H-a closed the
751    // per-entry `offset + size` overflow but did NOT verify that
752    // entries are in non-decreasing order. The downstream
753    // `binary_search_by` in `lookup_range` assumes sorted entries
754    // — feed it a sidecar with `[ooff=100,...],[ooff=0,...]` and the
755    // partition point logic returns garbage, then `start - entries[
756    // first_idx].original_offset` underflows `u64` (wraps in
757    // release, panics in dev) and the resulting `RangePlan` drives
758    // an arbitrary backend GET range. Reject out-of-order entries
759    // here with a dedicated typed error.
760    for win in entries.windows(2) {
761        let prev = &win[0];
762        let curr = &win[1];
763        if curr.original_offset < prev.original_end()
764            || curr.compressed_offset < prev.compressed_end()
765        {
766            return Err(IndexError::NonMonotonicEntries {
767                prev_original_end: prev.original_end(),
768                curr_original_offset: curr.original_offset,
769                prev_compressed_end: prev.compressed_end(),
770                curr_compressed_offset: curr.compressed_offset,
771            });
772        }
773    }
774    Ok(FrameIndex {
775        total_padded_size,
776        entries,
777        source_etag,
778        source_compressed_size,
779        sse_v3,
780    })
781}
782
783/// Object body の bytes 全体を scan して FrameIndex を構築する。
784/// `multipart_e2e.rs` 等で full-scan path として使用。
785pub fn build_index_from_body(body: &Bytes) -> Result<FrameIndex, crate::multipart::FrameError> {
786    let mut entries = Vec::new();
787    let mut original_off: u64 = 0;
788    // FrameIter は padding を skip してしまうので、自前で位置追跡しながら parse する
789    let mut cursor = 0usize;
790    let mut iter_buf = body.clone();
791    while cursor < body.len() {
792        // padding magic を skip
793        if cursor + 4 <= body.len() && &body[cursor..cursor + 4] == crate::multipart::PADDING_MAGIC
794        {
795            // PADDING_HEADER_BYTES = 4 magic + 8 length
796            if cursor + crate::multipart::PADDING_HEADER_BYTES > body.len() {
797                break;
798            }
799            let pad_len = u64::from_le_bytes(body[cursor + 4..cursor + 12].try_into().unwrap());
800            // v0.8.16 F-3: was `pad_len as usize`, silently
801            // truncating on 32-bit. A forged `S4P1 || u64::MAX`
802            // padding header advanced the cursor by `0xFFFF_FFFF`
803            // on 64-bit (skipping past `body.len()` into the next
804            // iteration's break) and by `0xFFFF_FFFF` truncated
805            // on 32-bit (different behaviour by target). Use
806            // try_from + checked_add so a malformed body fails
807            // closed with a typed `FrameError` instead of either
808            // wandering off the end of the buffer or silently
809            // skipping the bad frame.
810            let pad_len_usize = usize::try_from(pad_len)
811                .map_err(|_| crate::multipart::FrameError::PayloadTooLarge(pad_len))?;
812            let next_cursor = cursor
813                .checked_add(crate::multipart::PADDING_HEADER_BYTES)
814                .and_then(|n| n.checked_add(pad_len_usize))
815                .ok_or(crate::multipart::FrameError::PayloadTooLarge(pad_len))?;
816            cursor = next_cursor;
817            if cursor > body.len() {
818                break;
819            }
820            iter_buf = body.slice(cursor..);
821            continue;
822        }
823        // data frame
824        if cursor + crate::multipart::FRAME_HEADER_BYTES > body.len() {
825            break;
826        }
827        let (header, _payload, rest) = crate::multipart::read_frame(iter_buf.clone())?;
828        // v0.8.16 F-3: `header.compressed_size as usize` had the
829        // same 32-bit-truncation hazard as the padding cursor
830        // arithmetic above. Use try_from so a forged 4 GiB+ frame
831        // surfaces as `PayloadTooLarge` instead of wandering off.
832        let compressed_size_usize = usize::try_from(header.compressed_size)
833            .map_err(|_| crate::multipart::FrameError::PayloadTooLarge(header.compressed_size))?;
834        let frame_total = crate::multipart::FRAME_HEADER_BYTES
835            .checked_add(compressed_size_usize)
836            .ok_or(crate::multipart::FrameError::PayloadTooLarge(
837                header.compressed_size,
838            ))?;
839        entries.push(FrameIndexEntry {
840            original_offset: original_off,
841            original_size: header.original_size,
842            compressed_offset: cursor as u64,
843            compressed_size: frame_total as u64,
844        });
845        // v0.8.16 F-3: `original_off +=` was a plain `+`, panicking
846        // in dev / wrapping in release on a forged body whose
847        // cumulative original sizes overflow u64. Use checked_add
848        // → typed error.
849        original_off = original_off.checked_add(header.original_size).ok_or(
850            crate::multipart::FrameError::PayloadTooLarge(header.original_size),
851        )?;
852        cursor = cursor.checked_add(frame_total).ok_or(
853            crate::multipart::FrameError::PayloadTooLarge(header.compressed_size),
854        )?;
855        iter_buf = rest;
856    }
857    Ok(FrameIndex {
858        total_padded_size: body.len() as u64,
859        entries,
860        // The caller (s4-server `put_object`) stamps the version-binding
861        // fields after the backend PUT returns the authoritative ETag —
862        // build_index_from_body itself only sees the post-compress bytes
863        // and cannot fabricate a server-blessed ETag.
864        source_etag: None,
865        source_compressed_size: None,
866        // v0.9 #106: SSE binding is also stamped by the caller after the
867        // S4E6 encrypt path runs (`build_index_from_body` only sees the
868        // pre-encrypt compressed body and cannot know the salt / key_id).
869        sse_v3: None,
870    })
871}
872
873/// `<key>` から sidecar key を生成。
874pub fn sidecar_key(object_key: &str) -> String {
875    format!("{object_key}{SIDECAR_SUFFIX}")
876}
877
878/// v0.8.15 M-1: the per-object sidecar key suffix. Exposed publicly so
879/// the listener-side reserved-name guard
880/// (`s4-server::routing::is_reserved_object_key`) and the list-filter
881/// `ends_with(".s4index")` calls share one source of truth.
882pub const SIDECAR_SUFFIX: &str = ".s4index";
883
884/// v0.8.15 M-1: classify a candidate user-PUT object key as a
885/// reserved sidecar name. The S4 gateway uses `<key>.s4index` for
886/// its internal Range-GET fast-path; a user PUT under that name
887/// would either be hidden from `ListObjectsV2` (the filter strips
888/// `.s4index` suffixes) or get collected by the sidecar-cleanup
889/// path on `DeleteObject`. Returning a reserved-key error at the
890/// listener edge stops both before the user can be surprised.
891pub fn is_reserved_sidecar_key(object_key: &str) -> bool {
892    object_key.ends_with(SIDECAR_SUFFIX)
893}
894
895#[cfg(test)]
896mod tests {
897    use super::*;
898    use crate::CodecKind;
899    use crate::multipart::{FrameHeader, pad_to_minimum, write_frame};
900
901    fn sample_index() -> FrameIndex {
902        FrameIndex {
903            total_padded_size: 200,
904            entries: vec![
905                FrameIndexEntry {
906                    original_offset: 0,
907                    original_size: 100,
908                    compressed_offset: 0,
909                    compressed_size: 50,
910                },
911                FrameIndexEntry {
912                    original_offset: 100,
913                    original_size: 80,
914                    compressed_offset: 60, // gap of 10 = padding
915                    compressed_size: 40,
916                },
917                FrameIndexEntry {
918                    original_offset: 180,
919                    original_size: 50,
920                    compressed_offset: 100,
921                    compressed_size: 30,
922                },
923            ],
924            // Default-constructed in the v0.8.4 #73 H-2 sample so this fixture
925            // still drives the lookup_range / encode_decode / build_from_body
926            // paths that don't care about the version-binding fields.
927            source_etag: None,
928            source_compressed_size: None,
929            // v0.9 #106: default-None so this fixture exercises the v2
930            // emit path (writer only bumps to v3 when an SSE binding
931            // is attached).
932            sse_v3: None,
933        }
934    }
935
936    #[test]
937    fn encode_decode_roundtrip() {
938        let idx = sample_index();
939        let bytes = encode_index(&idx);
940        let decoded = decode_index(bytes).unwrap();
941        assert_eq!(decoded, idx);
942    }
943
944    /// v0.8.4 #73 H-2: v2 round-trip with the `source_etag` /
945    /// `source_compressed_size` fields populated.
946    ///
947    /// v0.9 #106: with `sse_v3 = None` the writer keeps emitting v2 so
948    /// non-SSE-S4-chunked PUTs are bit-for-bit unchanged from v0.8.x
949    /// on-disk.
950    #[test]
951    fn encode_decode_roundtrip_v2_with_source_binding() {
952        let mut idx = sample_index();
953        idx.source_etag = Some("\"deadbeefcafe\"".into());
954        idx.source_compressed_size = Some(987_654);
955        let bytes = encode_index(&idx);
956        assert_eq!(&bytes[..4], INDEX_MAGIC);
957        let version = u32::from_le_bytes(bytes[4..8].try_into().unwrap());
958        assert_eq!(
959            version, INDEX_VERSION_V2,
960            "writer must emit v2 when no SSE binding is attached"
961        );
962        let decoded = decode_index(bytes).unwrap();
963        assert_eq!(decoded, idx);
964    }
965
966    /// v0.9 #106: v3 round-trip with an SSE chunked binding. Writer
967    /// must emit v3 exactly when `sse_v3 = Some(..)`; decode must
968    /// restore the binding byte-for-byte.
969    #[test]
970    fn encode_decode_roundtrip_v3_with_sse_binding() {
971        let mut idx = sample_index();
972        idx.source_etag = Some("\"abc123\"".into());
973        idx.source_compressed_size = Some(2048);
974        idx.sse_v3 = Some(SseChunkBinding {
975            enc_chunk_size: 1024,
976            enc_chunk_count: 2,
977            enc_key_id: 7,
978            enc_salt: [0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88],
979            enc_plaintext_len: 2048,
980            enc_header_bytes: 24,
981        });
982        let bytes = encode_index(&idx);
983        let version = u32::from_le_bytes(bytes[4..8].try_into().unwrap());
984        assert_eq!(
985            version, INDEX_VERSION,
986            "writer must emit v3 when SSE binding is attached"
987        );
988        let decoded = decode_index(bytes).unwrap();
989        assert_eq!(decoded, idx);
990        assert!(decoded.sse_v3.is_some());
991    }
992
993    /// v0.9 #106: a v3 sidecar with `enc_chunk_size = 0` decodes
994    /// with `sse_v3 = None` (= forward-safe encoding for a future
995    /// non-SSE v3 use case).
996    #[test]
997    fn v3_with_zero_enc_chunk_size_decodes_as_no_sse() {
998        let mut bytes_mut = bytes::BytesMut::new();
999        bytes_mut.put_slice(INDEX_MAGIC);
1000        bytes_mut.put_u32_le(INDEX_VERSION); // 3
1001        bytes_mut.put_u64_le(0); // n entries
1002        bytes_mut.put_u64_le(0); // total_original
1003        bytes_mut.put_u64_le(0); // total_padded
1004        bytes_mut.put_u64_le(0); // source_compressed_size
1005        bytes_mut.put_u32_le(0); // etag_len
1006        // SSE block — all zero
1007        bytes_mut.put_u32_le(0); // enc_chunk_size
1008        bytes_mut.put_u32_le(0); // enc_chunk_count
1009        bytes_mut.put_u16_le(0); // enc_key_id
1010        bytes_mut.put_slice(&[0u8; 8]); // enc_salt
1011        bytes_mut.put_u64_le(0); // enc_plaintext_len
1012        bytes_mut.put_u32_le(0); // enc_header_bytes
1013        let decoded = decode_index(bytes_mut.freeze()).unwrap();
1014        assert!(decoded.sse_v3.is_none());
1015    }
1016
1017    /// v0.9 #106: back-compat — a real v0.8.x server hands a v2
1018    /// sidecar to a v0.9 decoder unchanged. Writer side already
1019    /// covered by `encode_decode_roundtrip_v2_with_source_binding`
1020    /// (no SSE binding → v2 layout), but this test also synthesizes
1021    /// raw v2 bytes via `encode_index_v1_for_test`-style spelling to
1022    /// prove the on-wire format hasn't drifted.
1023    #[test]
1024    fn v2_sidecar_decoded_by_v3_reader_with_no_sse_binding() {
1025        let mut idx = sample_index();
1026        idx.source_etag = Some("\"v2-only\"".into());
1027        idx.source_compressed_size = Some(123);
1028        let v2_bytes = encode_index(&idx); // sse_v3 = None → v2 emit
1029        let v2_version = u32::from_le_bytes(v2_bytes[4..8].try_into().unwrap());
1030        assert_eq!(v2_version, INDEX_VERSION_V2);
1031        let decoded = decode_index(v2_bytes).unwrap();
1032        assert!(decoded.sse_v3.is_none());
1033        assert_eq!(decoded.source_etag.as_deref(), Some("\"v2-only\""));
1034    }
1035
1036    /// v0.9 #106: `encrypted_lookup` covers a Range that lands inside
1037    /// a single chunk (no chunk-boundary crossing). Verifies the
1038    /// computed enc byte range targets exactly one chunk.
1039    #[test]
1040    fn encrypted_lookup_single_chunk() {
1041        let idx = FrameIndex {
1042            total_padded_size: 0,
1043            entries: vec![],
1044            source_etag: None,
1045            source_compressed_size: None,
1046            sse_v3: Some(SseChunkBinding {
1047                enc_chunk_size: 1024,
1048                enc_chunk_count: 4,
1049                enc_key_id: 1,
1050                enc_salt: [0u8; 8],
1051                enc_plaintext_len: 4096,
1052                enc_header_bytes: 24,
1053            }),
1054        };
1055        let plan = RangePlan {
1056            first_frame_idx: 0,
1057            last_frame_idx_inclusive: 0,
1058            byte_start: 100,
1059            byte_end_exclusive: 500,
1060            slice_start_in_combined: 0,
1061            slice_end_in_combined: 400,
1062        };
1063        let enc = idx.encrypted_lookup(&plan).unwrap();
1064        assert_eq!(enc.chunk_idx_start, 0);
1065        assert_eq!(enc.chunk_idx_last_inclusive, 0);
1066        assert_eq!(enc.enc_byte_start, 24);
1067        // Non-final chunk: 1024 + 16 = 1040 bytes on disk
1068        assert_eq!(enc.enc_byte_end_exclusive, 24 + 1040);
1069        assert_eq!(enc.pre_encrypt_slice_start_in_concat, 100);
1070        assert_eq!(enc.pre_encrypt_slice_end_in_concat, 500);
1071    }
1072
1073    /// v0.9 #106: `encrypted_lookup` covers a Range that crosses two
1074    /// chunks. Verifies the fetched enc range covers both chunks.
1075    #[test]
1076    fn encrypted_lookup_crossing_chunk_boundary() {
1077        let idx = FrameIndex {
1078            total_padded_size: 0,
1079            entries: vec![],
1080            source_etag: None,
1081            source_compressed_size: None,
1082            sse_v3: Some(SseChunkBinding {
1083                enc_chunk_size: 1024,
1084                enc_chunk_count: 4,
1085                enc_key_id: 1,
1086                enc_salt: [0u8; 8],
1087                enc_plaintext_len: 4096,
1088                enc_header_bytes: 24,
1089            }),
1090        };
1091        let plan = RangePlan {
1092            first_frame_idx: 0,
1093            last_frame_idx_inclusive: 0,
1094            byte_start: 900,          // chunk 0
1095            byte_end_exclusive: 1200, // chunk 1
1096            slice_start_in_combined: 0,
1097            slice_end_in_combined: 300,
1098        };
1099        let enc = idx.encrypted_lookup(&plan).unwrap();
1100        assert_eq!(enc.chunk_idx_start, 0);
1101        assert_eq!(enc.chunk_idx_last_inclusive, 1);
1102        assert_eq!(enc.enc_byte_start, 24);
1103        assert_eq!(enc.enc_byte_end_exclusive, 24 + 2 * 1040);
1104        assert_eq!(enc.pre_encrypt_slice_start_in_concat, 900);
1105        assert_eq!(enc.pre_encrypt_slice_end_in_concat, 1200);
1106    }
1107
1108    /// v0.9 #106: `encrypted_lookup` for the final (possibly smaller)
1109    /// chunk computes its on-disk size from `enc_plaintext_len` not
1110    /// the stride.
1111    #[test]
1112    fn encrypted_lookup_final_chunk_uses_residual_size() {
1113        // 3 chunks of 1024 bytes plus a final chunk of 500 bytes (4 chunks,
1114        // 3572 byte plaintext total).
1115        let idx = FrameIndex {
1116            total_padded_size: 0,
1117            entries: vec![],
1118            source_etag: None,
1119            source_compressed_size: None,
1120            sse_v3: Some(SseChunkBinding {
1121                enc_chunk_size: 1024,
1122                enc_chunk_count: 4,
1123                enc_key_id: 1,
1124                enc_salt: [0u8; 8],
1125                enc_plaintext_len: 3572,
1126                enc_header_bytes: 24,
1127            }),
1128        };
1129        let plan = RangePlan {
1130            first_frame_idx: 0,
1131            last_frame_idx_inclusive: 0,
1132            byte_start: 3100,
1133            byte_end_exclusive: 3500,
1134            slice_start_in_combined: 0,
1135            slice_end_in_combined: 400,
1136        };
1137        let enc = idx.encrypted_lookup(&plan).unwrap();
1138        assert_eq!(enc.chunk_idx_start, 3);
1139        assert_eq!(enc.chunk_idx_last_inclusive, 3);
1140        // Final chunk on disk: (3572 - 3*1024) + 16 = 500 + 16 = 516
1141        let expected_start = 24 + 3 * 1040;
1142        assert_eq!(enc.enc_byte_start, expected_start);
1143        assert_eq!(enc.enc_byte_end_exclusive, expected_start + 516);
1144    }
1145
1146    /// v0.9 #106: when the SSE binding is absent, `encrypted_lookup`
1147    /// returns `None` (caller falls back to buffered path).
1148    #[test]
1149    fn encrypted_lookup_without_binding_returns_none() {
1150        let idx = sample_index();
1151        let plan = RangePlan {
1152            first_frame_idx: 0,
1153            last_frame_idx_inclusive: 0,
1154            byte_start: 0,
1155            byte_end_exclusive: 10,
1156            slice_start_in_combined: 0,
1157            slice_end_in_combined: 10,
1158        };
1159        assert!(idx.encrypted_lookup(&plan).is_none());
1160    }
1161
1162    /// v0.8.4 #73 H-2: a sidecar produced by a pre-v0.8.4 deployment
1163    /// (= raw v1 bytes) must still decode cleanly under the v2/v3
1164    /// reader with `source_etag = None` / `source_compressed_size =
1165    /// None` / `sse_v3 = None`. The GET path treats the `None` shape
1166    /// as "legacy — verify skip" so existing on-disk sidecars keep
1167    /// serving partial fetches without a flag day. This locks in the
1168    /// `decode_index` dispatch on the `version` field that makes the
1169    /// back-compat path real.
1170    #[test]
1171    fn sidecar_header_back_compat_old_format_no_source_etag() {
1172        let v2_idx = {
1173            let mut idx = sample_index();
1174            idx.source_etag = Some("\"unused\"".into());
1175            idx.source_compressed_size = Some(42);
1176            idx
1177        };
1178        // Round-trip through the v1 encoder — i.e. simulate decoding a
1179        // sidecar that was written by a pre-v0.8.4 server. The version-
1180        // binding fields are dropped on the way through (v1 has no slot
1181        // for them) and must come back as `None`.
1182        let v1_bytes = encode_index_v1_for_test(&v2_idx);
1183        // Sanity: the on-wire version field is v1.
1184        let version = u32::from_le_bytes(v1_bytes[4..8].try_into().unwrap());
1185        assert_eq!(version, INDEX_VERSION_V1);
1186        let decoded = decode_index(v1_bytes).expect("v1 sidecar must still decode");
1187        // Frame entries + total_padded_size survive (the partial-fetch
1188        // logic still works), but the newer fields surface as None so
1189        // the GET path knows it cannot do an etag-bind verify and
1190        // applies the legacy "best-effort + fallback to full GET" rule.
1191        assert_eq!(decoded.entries, v2_idx.entries);
1192        assert_eq!(decoded.total_padded_size, v2_idx.total_padded_size);
1193        assert_eq!(decoded.source_etag, None);
1194        assert_eq!(decoded.source_compressed_size, None);
1195        assert!(decoded.sse_v3.is_none());
1196    }
1197
1198    #[test]
1199    fn lookup_range_within_single_frame() {
1200        let idx = sample_index();
1201        // 元 byte [10, 50) は frame 0 (original 0..100) の中
1202        let plan = idx.lookup_range(10, 50).unwrap();
1203        assert_eq!(plan.first_frame_idx, 0);
1204        assert_eq!(plan.last_frame_idx_inclusive, 0);
1205        assert_eq!(plan.byte_start, 0);
1206        assert_eq!(plan.byte_end_exclusive, 50); // frame 0 全体
1207        assert_eq!(plan.slice_start_in_combined, 10);
1208        assert_eq!(plan.slice_end_in_combined, 50);
1209    }
1210
1211    #[test]
1212    fn lookup_range_spans_frames() {
1213        let idx = sample_index();
1214        // [50, 150) は frame 0 後半 + frame 1 前半
1215        let plan = idx.lookup_range(50, 150).unwrap();
1216        assert_eq!(plan.first_frame_idx, 0);
1217        assert_eq!(plan.last_frame_idx_inclusive, 1);
1218        assert_eq!(plan.byte_start, 0);
1219        assert_eq!(plan.byte_end_exclusive, 100); // frame 0 (0..50) + frame 1 (60..100)
1220        assert_eq!(plan.slice_start_in_combined, 50);
1221        assert_eq!(plan.slice_end_in_combined, 150);
1222    }
1223
1224    #[test]
1225    fn lookup_range_at_end_clamps() {
1226        let idx = sample_index();
1227        // total original = 100 + 80 + 50 = 230、要求 200..1000 → 200..230 にクランプ
1228        let plan = idx.lookup_range(200, 1000).unwrap();
1229        assert_eq!(plan.first_frame_idx, 2);
1230        assert_eq!(plan.last_frame_idx_inclusive, 2);
1231        // frame 2 全体 (compressed_offset=100, size=30 → byte 100..130)
1232        assert_eq!(plan.byte_start, 100);
1233        assert_eq!(plan.byte_end_exclusive, 130);
1234    }
1235
1236    #[test]
1237    fn lookup_range_out_of_bounds_returns_none() {
1238        let idx = sample_index();
1239        assert!(idx.lookup_range(500, 600).is_none());
1240    }
1241
1242    #[test]
1243    fn build_index_from_real_body_skips_padding() {
1244        // 2 frame + 中間 padding を組んで、index が正しく構築されることを確認
1245        let mut buf = BytesMut::new();
1246        let p1 = Bytes::from_static(b"AAAA");
1247        write_frame(
1248            &mut buf,
1249            FrameHeader {
1250                codec: CodecKind::Passthrough,
1251                original_size: 100,
1252                compressed_size: p1.len() as u64,
1253                crc32c: 0,
1254            },
1255            &p1,
1256        );
1257        let frame1_end = buf.len();
1258        // pad to 5000 bytes
1259        pad_to_minimum(&mut buf, 5000);
1260        let pad_end = buf.len();
1261        let p2 = Bytes::from_static(b"BBBB");
1262        write_frame(
1263            &mut buf,
1264            FrameHeader {
1265                codec: CodecKind::Passthrough,
1266                original_size: 80,
1267                compressed_size: p2.len() as u64,
1268                crc32c: 0,
1269            },
1270            &p2,
1271        );
1272
1273        let idx = build_index_from_body(&buf.freeze()).unwrap();
1274        assert_eq!(idx.entries.len(), 2);
1275        assert_eq!(idx.entries[0].original_offset, 0);
1276        assert_eq!(idx.entries[0].compressed_offset, 0);
1277        assert_eq!(idx.entries[0].original_size, 100);
1278        assert_eq!(idx.entries[0].compressed_size, frame1_end as u64);
1279        assert_eq!(idx.entries[1].original_offset, 100);
1280        assert_eq!(idx.entries[1].compressed_offset, pad_end as u64);
1281        assert_eq!(idx.entries[1].original_size, 80);
1282        assert_eq!(idx.total_original_size(), 180);
1283    }
1284}
s4_codec/index.rs

s4_codec/
index.rs