s4_codec/index.rs
1//! Frame index — Range GET の partial fetch を可能にするための sidecar object 形式。
2//!
3//! ## 課題
4//!
5//! S4-multipart object は `[S4F2 frame]([S4P1 padding][S4F2 frame])*` のシーケンス。
6//! Range GET (e.g. `bytes=N-M`) を効率的に処理するには、(a) どの frame が
7//! decompressed offset N..M に対応しているか、(b) その frame は object body の
8//! どこ (compressed_offset) から始まるか、を知る必要がある。
9//!
10//! ## 解決策
11//!
12//! `<key>.s4index` という sidecar object に下記の binary index を書く:
13//!
14//! ```text
15//! ┌──── v1 32 byte header ─┐
16//! │ S4IX magic (4) │
17//! │ version u32 (4) │
18//! │ total_frames u64 (8) │
19//! │ total_original u64 (8) │
20//! │ total_padded u64 (8) │ ← S3 上の object サイズ (padding 含む)
21//! └────────────────────────┘
22//! 各 frame について 32 byte:
23//! original_offset u64 LE
24//! original_size u64 LE
25//! compressed_offset u64 LE ← S3 object body における frame header の開始位置
26//! compressed_size u64 LE ← header (28 byte) + payload の合計
27//! ```
28//!
29//! 1000 frame で 32 KB、10000 frame で 320 KB。10 万 frame でも 3.2 MB に収まる。
30//!
31//! ## 使い方
32//!
33//! - PUT: 1 frame の単純 index、PUT 完了後に sidecar 書込
34//! - CompleteMultipartUpload: object 全体を一度 fetch + scan して index を構築
35//! - Range GET: sidecar fetch → `lookup_range(start, end)` で frame 範囲 + S3 byte 範囲を取得
36//! → backend に partial Range GET → frame parse → decompress → slice
37//!
38//! ## v0.8.4 #73 H-2: source object version binding (v2 header)
39//!
40//! v1 では sidecar に source object の identity が無いため、object overwrite 後に
41//! sidecar が stale のままだと Range GET が **間違った frame** を返す危険があった
42//! (古い byte offset で新 object を partial GET する hazard)。攻撃者が backend を
43//! 直接触れる脅威モデルでは、偽 sidecar を仕込めば任意 frame を露呈させ得る。
44//!
45//! 対策として v2 header に `source_etag` と `source_compressed_size` を追加。GET
46//! 側は HEAD で current etag を取って一致確認 → 不一致なら sidecar を信用せず full
47//! GET path に fall back する。
48//!
49//! ```text
50//! ┌──── v2 header (variable) ┐
51//! │ S4IX magic (4) │
52//! │ version u32 (4) = 2 │
53//! │ total_frames u64 (8) │
54//! │ total_original u64 (8) │
55//! │ total_padded u64 (8) │
56//! │ source_compressed_size u64 (8) ← v2 で追加
57//! │ etag_len u32 (4) ← v2 で追加 (UTF-8 byte length, 0 = absent)
58//! │ etag bytes (etag_len) ← v2 で追加 (RFC 7232 entity-tag, quotes 含む)
59//! └──────────────────────────┘
60//! ```
61//!
62//! - **back-compat**: v1 sidecar が backend に既存していれば read-only で `decode_index`
63//! が `source_etag = None`, `source_compressed_size = None` で復元する。GET 側は
64//! `None` を見たら "legacy sidecar — verify skip, full GET にも fallback できる"
65//! と扱う (= 既存挙動保持)。
66//! - **新規 PUT**: 常に v2 を書く。`source_etag` は backend response の e_tag、
67//! `source_compressed_size` は put body 長 (= `total_padded_size`) が原則。
68
69use bytes::{Buf, BufMut, Bytes, BytesMut};
70use thiserror::Error;
71
72pub const INDEX_MAGIC: &[u8; 4] = b"S4IX";
73/// v0.8.4 #73 H-2: bumped 1 → 2. v2 appends `source_compressed_size` (u64) +
74/// `etag_len` (u32) + variable-length `etag` bytes to the fixed header. v1
75/// readers are kept as a back-compat path (see [`decode_index`]).
76pub const INDEX_VERSION: u32 = 2;
77/// Legacy v1 fixed header — kept for tests / back-compat readers.
78pub const INDEX_VERSION_V1: u32 = 1;
79pub const INDEX_HEADER_BYTES: usize = 4 + 4 + 8 + 8 + 4 + 4 + 8; // 40 (with padding)
80/// v1 fixed header layout (kept for back-compat readers).
81const HEADER_FIXED_V1: usize = 4 + 4 + 8 + 8 + 8;
82/// v2 fixed header layout (`HEADER_FIXED_V1` + `source_compressed_size` u64 +
83/// `etag_len` u32). The variable-length `etag` payload follows.
84const HEADER_FIXED_V2: usize = HEADER_FIXED_V1 + 8 + 4;
85pub const ENTRY_BYTES: usize = 8 + 8 + 8 + 8;
86
87#[derive(Debug, Clone, PartialEq, Eq)]
88pub struct FrameIndexEntry {
89 /// この frame が担当する decompressed byte 範囲の開始 (累計、0-based)
90 pub original_offset: u64,
91 /// 解凍後 byte 数 (frame header の original_size と同じ)
92 pub original_size: u64,
93 /// S3 object body 内での frame 開始位置 (S4F2 magic の先頭 byte)
94 pub compressed_offset: u64,
95 /// frame 全体のバイト数 (28 byte header + payload)
96 pub compressed_size: u64,
97}
98
99impl FrameIndexEntry {
100 /// v0.8.15 H-a: was plain `self.original_offset + self.original_size`,
101 /// which panics in `dev` (workspace `overflow_checks = true`) and
102 /// wraps in release on an attacker-supplied sidecar entry with
103 /// `original_offset = u64::MAX - 10` and `original_size = 100`.
104 /// `decode_index` now also pre-validates each entry below, so this
105 /// `saturating_add` is defence-in-depth — a corrupted in-memory
106 /// `FrameIndexEntry` cannot crash the gateway through `binary_search_by`.
107 pub fn original_end(&self) -> u64 {
108 self.original_offset.saturating_add(self.original_size)
109 }
110 pub fn compressed_end(&self) -> u64 {
111 self.compressed_offset.saturating_add(self.compressed_size)
112 }
113}
114
115#[derive(Debug, Clone, Default, PartialEq, Eq)]
116pub struct FrameIndex {
117 /// S3 上の object 全体サイズ (padding frame 含む)
118 pub total_padded_size: u64,
119 pub entries: Vec<FrameIndexEntry>,
120 /// v0.8.4 #73 H-2: backend-reported ETag of the source object the
121 /// sidecar describes. Populated by `s4-server::put_object` from the
122 /// backend's PUT response so the matching GET can `head_object` and
123 /// confirm it's still talking about the same body. `None` for legacy
124 /// (v1) sidecars decoded out of an existing backend, in which case
125 /// the GET path treats the partial-fetch as best-effort and falls
126 /// back to a full read on any inconsistency signal.
127 pub source_etag: Option<String>,
128 /// v0.8.4 #73 H-2: backend object's compressed bytes length the sidecar
129 /// was computed against. Cross-check signal alongside `source_etag` —
130 /// some backends (lifecycle moves, multi-object operations) can change
131 /// the bytes without a fresh ETag, so a size mismatch is independently
132 /// load-bearing. `None` on legacy v1 sidecars.
133 pub source_compressed_size: Option<u64>,
134}
135
136impl FrameIndex {
137 pub fn total_original_size(&self) -> u64 {
138 self.entries.last().map(|e| e.original_end()).unwrap_or(0)
139 }
140
141 /// Range request `[start, end_exclusive)` を解決して必要 frame の (start_idx, end_idx_exclusive)
142 /// と S3 上の partial-fetch byte range `[byte_start, byte_end_exclusive)` を返す。
143 ///
144 /// 1 frame でもオーバーラップしていればその frame の **全 byte** を fetch する
145 /// (= 部分 frame は decompress 単位)。
146 pub fn lookup_range(&self, start: u64, end_exclusive: u64) -> Option<RangePlan> {
147 if self.entries.is_empty() || start >= end_exclusive {
148 return None;
149 }
150 let total = self.total_original_size();
151 if start >= total {
152 return None;
153 }
154 let clamped_end = end_exclusive.min(total);
155
156 // start を含む frame を二分探索 (entries は original_offset 昇順)
157 let first_idx = match self.entries.binary_search_by(|e| {
158 if e.original_end() <= start {
159 std::cmp::Ordering::Less
160 } else if e.original_offset > start {
161 std::cmp::Ordering::Greater
162 } else {
163 std::cmp::Ordering::Equal
164 }
165 }) {
166 Ok(i) => i,
167 Err(_) => return None,
168 };
169 // end を含む frame (end-1 を含むもの)
170 let last_inclusive = clamped_end - 1;
171 let last_idx = match self.entries.binary_search_by(|e| {
172 if e.original_end() <= last_inclusive {
173 std::cmp::Ordering::Less
174 } else if e.original_offset > last_inclusive {
175 std::cmp::Ordering::Greater
176 } else {
177 std::cmp::Ordering::Equal
178 }
179 }) {
180 Ok(i) => i,
181 Err(_) => return None,
182 };
183
184 let byte_start = self.entries[first_idx].compressed_offset;
185 let byte_end_exclusive = self.entries[last_idx].compressed_end();
186 Some(RangePlan {
187 first_frame_idx: first_idx,
188 last_frame_idx_inclusive: last_idx,
189 byte_start,
190 byte_end_exclusive,
191 // slice 開始 / 終了の original 内 offset
192 slice_start_in_combined: start - self.entries[first_idx].original_offset,
193 slice_end_in_combined: clamped_end - self.entries[first_idx].original_offset,
194 })
195 }
196}
197
198/// `lookup_range` の結果。`byte_start..byte_end_exclusive` を S3 から fetch、
199/// 該当 frames を decompress し、結果バイト列を `[slice_start_in_combined,
200/// slice_end_in_combined)` で slice すれば最終結果。
201#[derive(Debug, Clone, PartialEq, Eq)]
202pub struct RangePlan {
203 pub first_frame_idx: usize,
204 pub last_frame_idx_inclusive: usize,
205 pub byte_start: u64,
206 pub byte_end_exclusive: u64,
207 pub slice_start_in_combined: u64,
208 pub slice_end_in_combined: u64,
209}
210
211#[derive(Debug, Error)]
212pub enum IndexError {
213 #[error("index too short: {0} bytes")]
214 TooShort(usize),
215 #[error("bad index magic: {got:?}")]
216 BadMagic { got: [u8; 4] },
217 #[error("unsupported index version {0} (this build supports {INDEX_VERSION})")]
218 UnsupportedVersion(u32),
219 #[error("entry count {claimed} doesn't match buffer remaining {remaining}")]
220 EntryCountMismatch { claimed: u64, remaining: usize },
221 /// v0.8.15 H-a: an entry's `original_offset + original_size` or
222 /// `compressed_offset + compressed_size` overflows `u64`. The
223 /// downstream `binary_search_by` / `lookup_range` machinery
224 /// assumes monotonically-increasing offsets — overflow would let
225 /// a forged sidecar drive the range planner into garbage state.
226 #[error(
227 "frame index entry overflows: original_offset={ooff}, original_size={osize}, \
228 compressed_offset={coff}, compressed_size={csize}"
229 )]
230 EntryOverflow {
231 ooff: u64,
232 osize: u64,
233 coff: u64,
234 csize: u64,
235 },
236 /// v0.8.15 H-c: per-sidecar entry-count cap. Pairs with the v0.8.12
237 /// `#124` `Vec::with_capacity` clamp — refuses pathologically-large
238 /// `n` at parse time even before the `expected_remaining == input.len()`
239 /// guard, so a 32-bit target can't be tricked into running `0..n`
240 /// past the buffer.
241 #[error("frame index entry count {got} exceeds MAX_FRAMES={max}")]
242 TooManyFrames { got: u64, max: u64 },
243 /// v0.8.15 H-c: `etag_len` exceeds the maximum addressable size on
244 /// this target (32-bit) or the operator-configured cap.
245 #[error("sidecar etag_len {got} exceeds MAX_ETAG_BYTES={max}")]
246 EtagTooLong { got: u32, max: u32 },
247}
248
249/// v0.8.15 H-c: hard upper bound on the number of entries
250/// [`decode_index`] will accept. 16 M × 32 B = 512 MiB sidecar
251/// body — orders of magnitude over any real workload (a typical
252/// 5 GiB object hits ~1280 frames at the 4 MiB default chunk).
253/// Above this we'd be parsing an attacker payload, not a legitimate
254/// sidecar.
255pub const MAX_FRAMES: u64 = 16 * 1024 * 1024;
256/// v0.8.15 H-c: hard upper bound on the etag-length field. AWS S3
257/// ETags are ≤ 64 bytes including quotes; MinIO / Garage match. The
258/// 4 KiB cap leaves room for non-canonical multipart ETags
259/// (`<hex>-<n>`) without admitting attacker-controlled payloads.
260pub const MAX_ETAG_BYTES: u32 = 4096;
261
262/// v0.8.4 #73 H-2: emit the **v2** layout (with `source_etag` /
263/// `source_compressed_size`). Pre-v0.8.4 deployments that PUT under v1 are
264/// still readable (decode_index dispatches on the version field) — only the
265/// writer path is bumped here.
266pub fn encode_index(idx: &FrameIndex) -> Bytes {
267 let etag_bytes = idx.source_etag.as_deref().unwrap_or("").as_bytes();
268 let mut buf = BytesMut::with_capacity(
269 HEADER_FIXED_V2 + etag_bytes.len() + idx.entries.len() * ENTRY_BYTES,
270 );
271 buf.put_slice(INDEX_MAGIC);
272 buf.put_u32_le(INDEX_VERSION);
273 buf.put_u64_le(idx.entries.len() as u64);
274 buf.put_u64_le(idx.total_original_size());
275 buf.put_u64_le(idx.total_padded_size);
276 // v2 additions
277 buf.put_u64_le(idx.source_compressed_size.unwrap_or(0));
278 buf.put_u32_le(etag_bytes.len() as u32);
279 buf.put_slice(etag_bytes);
280 for e in &idx.entries {
281 buf.put_u64_le(e.original_offset);
282 buf.put_u64_le(e.original_size);
283 buf.put_u64_le(e.compressed_offset);
284 buf.put_u64_le(e.compressed_size);
285 }
286 buf.freeze()
287}
288
289/// v0.8.4 #73 H-2: legacy v1 encoder retained for the back-compat unit test
290/// (`sidecar_header_back_compat_old_format_no_source_etag`) which has to
291/// synthesize a v1 buffer to prove decode_index still parses it. Production
292/// callers should always go through [`encode_index`] which emits v2.
293#[doc(hidden)]
294pub fn encode_index_v1_for_test(idx: &FrameIndex) -> Bytes {
295 let mut buf = BytesMut::with_capacity(HEADER_FIXED_V1 + idx.entries.len() * ENTRY_BYTES);
296 buf.put_slice(INDEX_MAGIC);
297 buf.put_u32_le(INDEX_VERSION_V1);
298 buf.put_u64_le(idx.entries.len() as u64);
299 buf.put_u64_le(idx.total_original_size());
300 buf.put_u64_le(idx.total_padded_size);
301 for e in &idx.entries {
302 buf.put_u64_le(e.original_offset);
303 buf.put_u64_le(e.original_size);
304 buf.put_u64_le(e.compressed_offset);
305 buf.put_u64_le(e.compressed_size);
306 }
307 buf.freeze()
308}
309
310pub fn decode_index(mut input: Bytes) -> Result<FrameIndex, IndexError> {
311 if input.len() < HEADER_FIXED_V1 {
312 return Err(IndexError::TooShort(input.len()));
313 }
314 let mut magic = [0u8; 4];
315 magic.copy_from_slice(&input[..4]);
316 if &magic != INDEX_MAGIC {
317 return Err(IndexError::BadMagic { got: magic });
318 }
319 input.advance(4);
320 let version = input.get_u32_le();
321 let n = input.get_u64_le();
322 let _total_original = input.get_u64_le();
323 let total_padded_size = input.get_u64_le();
324 // v0.8.15 H-c: hard cap on `n` *before* any size arithmetic. The
325 // existing `expected_remaining == input.len()` check is a
326 // necessary condition but not sufficient — on a 32-bit target,
327 // `n as usize` truncates a 33-bit value and the buffer check
328 // would silently pass with the wrong loop count. Reject early.
329 if n > MAX_FRAMES {
330 return Err(IndexError::TooManyFrames {
331 got: n,
332 max: MAX_FRAMES,
333 });
334 }
335 // Dispatch on version. v1 jumps straight to the entry table; v2 reads
336 // the additional fixed fields + variable-length etag before the entries.
337 let (source_compressed_size, source_etag) = match version {
338 v if v == INDEX_VERSION_V1 => (None, None),
339 v if v == INDEX_VERSION => {
340 // v2 fixed-header tail: source_compressed_size (u64) + etag_len (u32).
341 if input.len() < 8 + 4 {
342 return Err(IndexError::TooShort(input.len()));
343 }
344 let scs = input.get_u64_le();
345 let etag_len_u32 = input.get_u32_le();
346 // v0.8.15 H-c: bound `etag_len` *before* the `as usize`
347 // cast so the buffer check on a 32-bit WASM target can't
348 // be tricked into a usize-truncated value.
349 if etag_len_u32 > MAX_ETAG_BYTES {
350 return Err(IndexError::EtagTooLong {
351 got: etag_len_u32,
352 max: MAX_ETAG_BYTES,
353 });
354 }
355 let etag_len = etag_len_u32 as usize;
356 if input.len() < etag_len {
357 return Err(IndexError::TooShort(input.len()));
358 }
359 // Slice off the etag bytes; treat decode failure as "no etag" so
360 // a corrupted etag field still leaves a usable index (the GET
361 // path will fall back to full read on the missing binding).
362 let etag_bytes = input.split_to(etag_len);
363 let etag = if etag_len == 0 {
364 None
365 } else {
366 std::str::from_utf8(&etag_bytes).ok().map(str::to_owned)
367 };
368 (if scs == 0 { None } else { Some(scs) }, etag)
369 }
370 other => return Err(IndexError::UnsupportedVersion(other)),
371 };
372 // v0.8.15 H-c: `n * ENTRY_BYTES` cannot overflow `usize` here
373 // because `n <= MAX_FRAMES = 16M` and `ENTRY_BYTES = 32`, and on
374 // 32-bit targets the resulting value fits in `usize` (≤ 512
375 // MiB). The `as usize` cast on `n` is now bounded by the same
376 // ceiling.
377 let expected_remaining = (n as usize).saturating_mul(ENTRY_BYTES);
378 if input.len() != expected_remaining {
379 return Err(IndexError::EntryCountMismatch {
380 claimed: n,
381 remaining: input.len(),
382 });
383 }
384 // v0.8.12 HIGH-14 fix: clamp the initial allocation the way the
385 // CpuZstd / CpuGzip decompress path does (see
386 // `DECOMPRESS_BOOTSTRAP_CAPACITY` in `lib.rs`, landed in #89).
387 // A forged sidecar with `n = 100_000_000` paired with a 3.2 GiB
388 // body (the only way the `expected_remaining` check above passes
389 // for that `n`) would otherwise commit ~3.2 GiB of `FrameIndexEntry`
390 // slots up front, on top of the 3.2 GiB body bytes already in
391 // RAM. The honest cap is 4096 entries (128 KiB at
392 // `ENTRY_BYTES = 32`) — large enough that single-PUT framed and
393 // typical multipart objects don't pay any growth cost, small
394 // enough that an adversarial sidecar can't drive multi-GiB
395 // pre-allocations behind the bounded `expected_remaining`
396 // check. The `push` loop below grows the vector naturally and
397 // is itself bounded by `expected_remaining == input.len()`.
398 const BOOTSTRAP_ENTRIES: usize = 4096;
399 let initial_cap = (n as usize).min(BOOTSTRAP_ENTRIES);
400 let mut entries = Vec::with_capacity(initial_cap);
401 for _ in 0..n {
402 let original_offset = input.get_u64_le();
403 let original_size = input.get_u64_le();
404 let compressed_offset = input.get_u64_le();
405 let compressed_size = input.get_u64_le();
406 // v0.8.15 H-a: refuse entries whose `offset + size` overflows
407 // `u64`. The downstream `binary_search_by` / `lookup_range`
408 // machinery relies on monotone offsets — a wrapped value
409 // would let a forged sidecar drive `RangePlan.byte_end_exclusive`
410 // to garbage.
411 if original_offset.checked_add(original_size).is_none()
412 || compressed_offset.checked_add(compressed_size).is_none()
413 {
414 return Err(IndexError::EntryOverflow {
415 ooff: original_offset,
416 osize: original_size,
417 coff: compressed_offset,
418 csize: compressed_size,
419 });
420 }
421 entries.push(FrameIndexEntry {
422 original_offset,
423 original_size,
424 compressed_offset,
425 compressed_size,
426 });
427 }
428 Ok(FrameIndex {
429 total_padded_size,
430 entries,
431 source_etag,
432 source_compressed_size,
433 })
434}
435
436/// Object body の bytes 全体を scan して FrameIndex を構築する。
437/// `multipart_e2e.rs` 等で full-scan path として使用。
438pub fn build_index_from_body(body: &Bytes) -> Result<FrameIndex, crate::multipart::FrameError> {
439 let mut entries = Vec::new();
440 let mut original_off: u64 = 0;
441 // FrameIter は padding を skip してしまうので、自前で位置追跡しながら parse する
442 let mut cursor = 0usize;
443 let mut iter_buf = body.clone();
444 while cursor < body.len() {
445 // padding magic を skip
446 if cursor + 4 <= body.len() && &body[cursor..cursor + 4] == crate::multipart::PADDING_MAGIC
447 {
448 // PADDING_HEADER_BYTES = 4 magic + 8 length
449 if cursor + crate::multipart::PADDING_HEADER_BYTES > body.len() {
450 break;
451 }
452 let pad_len = u64::from_le_bytes(body[cursor + 4..cursor + 12].try_into().unwrap());
453 cursor += crate::multipart::PADDING_HEADER_BYTES + pad_len as usize;
454 iter_buf = body.slice(cursor..);
455 continue;
456 }
457 // data frame
458 if cursor + crate::multipart::FRAME_HEADER_BYTES > body.len() {
459 break;
460 }
461 let (header, _payload, rest) = crate::multipart::read_frame(iter_buf.clone())?;
462 let frame_total = crate::multipart::FRAME_HEADER_BYTES + header.compressed_size as usize;
463 entries.push(FrameIndexEntry {
464 original_offset: original_off,
465 original_size: header.original_size,
466 compressed_offset: cursor as u64,
467 compressed_size: frame_total as u64,
468 });
469 original_off += header.original_size;
470 cursor += frame_total;
471 iter_buf = rest;
472 }
473 Ok(FrameIndex {
474 total_padded_size: body.len() as u64,
475 entries,
476 // The caller (s4-server `put_object`) stamps the version-binding
477 // fields after the backend PUT returns the authoritative ETag —
478 // build_index_from_body itself only sees the post-compress bytes
479 // and cannot fabricate a server-blessed ETag.
480 source_etag: None,
481 source_compressed_size: None,
482 })
483}
484
485/// `<key>` から sidecar key を生成。
486pub fn sidecar_key(object_key: &str) -> String {
487 format!("{object_key}{SIDECAR_SUFFIX}")
488}
489
490/// v0.8.15 M-1: the per-object sidecar key suffix. Exposed publicly so
491/// the listener-side reserved-name guard
492/// (`s4-server::routing::is_reserved_object_key`) and the list-filter
493/// `ends_with(".s4index")` calls share one source of truth.
494pub const SIDECAR_SUFFIX: &str = ".s4index";
495
496/// v0.8.15 M-1: classify a candidate user-PUT object key as a
497/// reserved sidecar name. The S4 gateway uses `<key>.s4index` for
498/// its internal Range-GET fast-path; a user PUT under that name
499/// would either be hidden from `ListObjectsV2` (the filter strips
500/// `.s4index` suffixes) or get collected by the sidecar-cleanup
501/// path on `DeleteObject`. Returning a reserved-key error at the
502/// listener edge stops both before the user can be surprised.
503pub fn is_reserved_sidecar_key(object_key: &str) -> bool {
504 object_key.ends_with(SIDECAR_SUFFIX)
505}
506
507#[cfg(test)]
508mod tests {
509 use super::*;
510 use crate::CodecKind;
511 use crate::multipart::{FrameHeader, pad_to_minimum, write_frame};
512
513 fn sample_index() -> FrameIndex {
514 FrameIndex {
515 total_padded_size: 200,
516 entries: vec![
517 FrameIndexEntry {
518 original_offset: 0,
519 original_size: 100,
520 compressed_offset: 0,
521 compressed_size: 50,
522 },
523 FrameIndexEntry {
524 original_offset: 100,
525 original_size: 80,
526 compressed_offset: 60, // gap of 10 = padding
527 compressed_size: 40,
528 },
529 FrameIndexEntry {
530 original_offset: 180,
531 original_size: 50,
532 compressed_offset: 100,
533 compressed_size: 30,
534 },
535 ],
536 // Default-constructed in the v0.8.4 #73 H-2 sample so this fixture
537 // still drives the lookup_range / encode_decode / build_from_body
538 // paths that don't care about the version-binding fields.
539 source_etag: None,
540 source_compressed_size: None,
541 }
542 }
543
544 #[test]
545 fn encode_decode_roundtrip() {
546 let idx = sample_index();
547 let bytes = encode_index(&idx);
548 let decoded = decode_index(bytes).unwrap();
549 assert_eq!(decoded, idx);
550 }
551
552 /// v0.8.4 #73 H-2: v2 round-trip with the new `source_etag` /
553 /// `source_compressed_size` fields populated.
554 #[test]
555 fn encode_decode_roundtrip_v2_with_source_binding() {
556 let mut idx = sample_index();
557 idx.source_etag = Some("\"deadbeefcafe\"".into());
558 idx.source_compressed_size = Some(987_654);
559 let bytes = encode_index(&idx);
560 // First 4 bytes magic + next 4 bytes LE = INDEX_VERSION (2).
561 assert_eq!(&bytes[..4], INDEX_MAGIC);
562 let version = u32::from_le_bytes(bytes[4..8].try_into().unwrap());
563 assert_eq!(version, INDEX_VERSION, "writer must always emit v2");
564 let decoded = decode_index(bytes).unwrap();
565 assert_eq!(decoded, idx);
566 }
567
568 /// v0.8.4 #73 H-2: a sidecar produced by a pre-v0.8.4 deployment
569 /// (= raw v1 bytes) must still decode cleanly under the v2 reader
570 /// with `source_etag = None` / `source_compressed_size = None`. The
571 /// GET path treats the `None` shape as "legacy — verify skip" so
572 /// existing on-disk sidecars keep serving partial fetches without a
573 /// flag day. This locks in the `decode_index` dispatch on the
574 /// `version` field that makes the back-compat path real.
575 #[test]
576 fn sidecar_header_back_compat_old_format_no_source_etag() {
577 let v2_idx = {
578 let mut idx = sample_index();
579 idx.source_etag = Some("\"unused\"".into());
580 idx.source_compressed_size = Some(42);
581 idx
582 };
583 // Round-trip through the v1 encoder — i.e. simulate decoding a
584 // sidecar that was written by a pre-v0.8.4 server. The version-
585 // binding fields are dropped on the way through (v1 has no slot
586 // for them) and must come back as `None`.
587 let v1_bytes = encode_index_v1_for_test(&v2_idx);
588 // Sanity: the on-wire version field is v1.
589 let version = u32::from_le_bytes(v1_bytes[4..8].try_into().unwrap());
590 assert_eq!(version, INDEX_VERSION_V1);
591 let decoded = decode_index(v1_bytes).expect("v1 sidecar must still decode");
592 // Frame entries + total_padded_size survive (the partial-fetch
593 // logic still works), but the new v2-only fields surface as None
594 // so the GET path knows it cannot do an etag-bind verify and
595 // applies the legacy "best-effort + fallback to full GET" rule.
596 assert_eq!(decoded.entries, v2_idx.entries);
597 assert_eq!(decoded.total_padded_size, v2_idx.total_padded_size);
598 assert_eq!(decoded.source_etag, None);
599 assert_eq!(decoded.source_compressed_size, None);
600 }
601
602 #[test]
603 fn lookup_range_within_single_frame() {
604 let idx = sample_index();
605 // 元 byte [10, 50) は frame 0 (original 0..100) の中
606 let plan = idx.lookup_range(10, 50).unwrap();
607 assert_eq!(plan.first_frame_idx, 0);
608 assert_eq!(plan.last_frame_idx_inclusive, 0);
609 assert_eq!(plan.byte_start, 0);
610 assert_eq!(plan.byte_end_exclusive, 50); // frame 0 全体
611 assert_eq!(plan.slice_start_in_combined, 10);
612 assert_eq!(plan.slice_end_in_combined, 50);
613 }
614
615 #[test]
616 fn lookup_range_spans_frames() {
617 let idx = sample_index();
618 // [50, 150) は frame 0 後半 + frame 1 前半
619 let plan = idx.lookup_range(50, 150).unwrap();
620 assert_eq!(plan.first_frame_idx, 0);
621 assert_eq!(plan.last_frame_idx_inclusive, 1);
622 assert_eq!(plan.byte_start, 0);
623 assert_eq!(plan.byte_end_exclusive, 100); // frame 0 (0..50) + frame 1 (60..100)
624 assert_eq!(plan.slice_start_in_combined, 50);
625 assert_eq!(plan.slice_end_in_combined, 150);
626 }
627
628 #[test]
629 fn lookup_range_at_end_clamps() {
630 let idx = sample_index();
631 // total original = 100 + 80 + 50 = 230、要求 200..1000 → 200..230 にクランプ
632 let plan = idx.lookup_range(200, 1000).unwrap();
633 assert_eq!(plan.first_frame_idx, 2);
634 assert_eq!(plan.last_frame_idx_inclusive, 2);
635 // frame 2 全体 (compressed_offset=100, size=30 → byte 100..130)
636 assert_eq!(plan.byte_start, 100);
637 assert_eq!(plan.byte_end_exclusive, 130);
638 }
639
640 #[test]
641 fn lookup_range_out_of_bounds_returns_none() {
642 let idx = sample_index();
643 assert!(idx.lookup_range(500, 600).is_none());
644 }
645
646 #[test]
647 fn build_index_from_real_body_skips_padding() {
648 // 2 frame + 中間 padding を組んで、index が正しく構築されることを確認
649 let mut buf = BytesMut::new();
650 let p1 = Bytes::from_static(b"AAAA");
651 write_frame(
652 &mut buf,
653 FrameHeader {
654 codec: CodecKind::Passthrough,
655 original_size: 100,
656 compressed_size: p1.len() as u64,
657 crc32c: 0,
658 },
659 &p1,
660 );
661 let frame1_end = buf.len();
662 // pad to 5000 bytes
663 pad_to_minimum(&mut buf, 5000);
664 let pad_end = buf.len();
665 let p2 = Bytes::from_static(b"BBBB");
666 write_frame(
667 &mut buf,
668 FrameHeader {
669 codec: CodecKind::Passthrough,
670 original_size: 80,
671 compressed_size: p2.len() as u64,
672 crc32c: 0,
673 },
674 &p2,
675 );
676
677 let idx = build_index_from_body(&buf.freeze()).unwrap();
678 assert_eq!(idx.entries.len(), 2);
679 assert_eq!(idx.entries[0].original_offset, 0);
680 assert_eq!(idx.entries[0].compressed_offset, 0);
681 assert_eq!(idx.entries[0].original_size, 100);
682 assert_eq!(idx.entries[0].compressed_size, frame1_end as u64);
683 assert_eq!(idx.entries[1].original_offset, 100);
684 assert_eq!(idx.entries[1].compressed_offset, pad_end as u64);
685 assert_eq!(idx.entries[1].original_size, 80);
686 assert_eq!(idx.total_original_size(), 180);
687 }
688}