Skip to main content

s4_codec/
dispatcher.rs

1//! PUT 時にどの codec で圧縮するかを選ぶ dispatcher。
2//!
3//! Phase 1 では「常に同じ codec を選ぶ」`AlwaysDispatcher` を提供。
4//! Phase 1 後半で `SamplingDispatcher` を追加し、入力先頭の sampling で
5//! integer 主体 / text 主体 / 既圧縮 を判定して codec を切り替える。
6
7use crate::CodecKind;
8
9/// PUT body の先頭 sample から codec を選ぶ trait。
10///
11/// v0.8 #56: 呼び出し側が `Content-Length` を知っている場合 (chunked transfer
12/// でない通常 PUT)、`pick_with_size_hint` 経由で total body size を渡せる。
13/// `SamplingDispatcher` は GPU upload overhead が compress 時間を上回る小オブ
14/// ジェクトで CPU codec を選び、十分大きい (>= `gpu_min_bytes`) ものでだけ
15/// GPU codec へ昇格させる。size hint が `None` (chunked transfer) の場合は
16/// 保守的に CPU 側に倒す。
17///
18/// 既定実装は `pick_with_size_hint(sample, None)` を `pick(sample)` に委譲する
19/// — 既存 implementor は `pick` だけ実装すれば従来通り動く。
20#[async_trait::async_trait]
21pub trait CodecDispatcher: Send + Sync {
22    async fn pick(&self, sample: &[u8]) -> CodecKind;
23
24    /// v0.8 #56: size-hint aware pick. 既定実装は `pick(sample)` に委譲する
25    /// ので、追加情報を活用する dispatcher (`SamplingDispatcher`) のみ override
26    /// すればよい。`total_size = None` は「chunked transfer で content-length
27    /// が無い」ケースを表す。
28    async fn pick_with_size_hint(&self, sample: &[u8], _total_size: Option<u64>) -> CodecKind {
29        self.pick(sample).await
30    }
31}
32
33/// 常に同じ kind を返す dispatcher (固定 codec 運用)。
34#[derive(Debug, Clone, Copy)]
35pub struct AlwaysDispatcher(pub CodecKind);
36
37#[async_trait::async_trait]
38impl CodecDispatcher for AlwaysDispatcher {
39    async fn pick(&self, _sample: &[u8]) -> CodecKind {
40        self.0
41    }
42}
43
44/// 入力 sample を見て codec を選ぶ dispatcher。
45///
46/// 判定順 (上位優先):
47/// 1. 短すぎる入力 (<128 byte) → `default`
48/// 2. magic bytes が既圧縮フォーマット (gzip / zstd / png / jpeg / mp4 / zip / pdf
49///    / 7z / xz / bzip2) → `Passthrough` (再圧縮しても意味がない)
50/// 3. Shannon entropy が `entropy_threshold` (default 7.5 bits/byte) 以上 → `Passthrough`
51///    (高エントロピー = ほぼランダム = 圧縮余地なし)
52/// 4. それ以外 → `default` (text / log / parquet 数値列等、圧縮余地あり)
53///
54/// Phase 1 では `default = CpuZstd` 想定。Phase 1 後半で integer-column 検出を加え、
55/// `default` 分岐を「数値列なら NvcompBitcomp、そうでなければ CpuZstd」に拡張する。
56///
57/// ## v0.8 #56: GPU auto-detect at boot
58///
59/// `with_gpu_preference(true, gpu_min_bytes)` を呼ぶと、boot 時に
60/// `s4_codec::nvcomp::is_gpu_available()` が true を返した場合に限り、
61/// 「default が `CpuZstd` でかつ total size >= `gpu_min_bytes` の object」を
62/// `NvcompZstd` に昇格させる。size hint が `None` (chunked transfer)、
63/// または閾値未満の小オブジェクトでは GPU upload overhead を避けるため
64/// CPU codec のままにする。
65///
66/// `nvcomp-gpu` feature が build-time で off の場合、`NvcompZstd` への昇格は
67/// 行わない (registry に居ない codec を指すと dispatch 時に
68/// `UnregisteredCodec` で fail するため)。orchestrator は main.rs 側で
69/// `prefer_gpu = false` を強制することでこれを担保する。
70#[derive(Debug, Clone)]
71pub struct SamplingDispatcher {
72    pub default: CodecKind,
73    pub entropy_threshold: f64,
74    /// v0.8 #56: when set, route large `CpuZstd` picks through `NvcompZstd`.
75    pub prefer_gpu: bool,
76    /// v0.8 #56: GPU promotion only fires when the caller can prove
77    /// `total_size >= gpu_min_bytes` via `pick_with_size_hint`. Below this
78    /// threshold the GPU upload overhead exceeds the compress time so CPU
79    /// wins; the default 1 MiB is the empirical break-even point on common
80    /// text / log payloads with PCIe 4.0 + an A10G-class GPU.
81    pub gpu_min_bytes: usize,
82    /// v0.8.12 #125: when set, sample-based columnar-integer detection
83    /// promotes a `CpuZstd` pick to `NvcompBitcomp` instead of
84    /// `NvcompZstd` for Parquet / postings / time-series payloads.
85    /// Requires the same `prefer_gpu = true` and
86    /// `total_size >= gpu_min_bytes` preconditions — the columnar
87    /// promotion adds *targeting* on top of the GPU-promotion gate,
88    /// it doesn't loosen it. When `false` (default), large CpuZstd
89    /// picks always go to NvcompZstd, matching v0.8.11 behaviour.
90    pub prefer_columnar_gpu: bool,
91}
92
93impl SamplingDispatcher {
94    pub const DEFAULT_ENTROPY_THRESHOLD: f64 = 7.5;
95    pub const MIN_SAMPLE_BYTES: usize = 128;
96    /// v0.8 #56: 1 MiB. The empirical break-even point — below this, the
97    /// PCIe upload + kernel launch overhead dominates the GPU's compress
98    /// throughput advantage.
99    pub const DEFAULT_GPU_MIN_BYTES: usize = 1_048_576;
100
101    pub fn new(default: CodecKind) -> Self {
102        Self {
103            default,
104            entropy_threshold: Self::DEFAULT_ENTROPY_THRESHOLD,
105            prefer_gpu: false,
106            gpu_min_bytes: Self::DEFAULT_GPU_MIN_BYTES,
107            prefer_columnar_gpu: false,
108        }
109    }
110
111    /// v0.8.12 #125: enable Bitcomp routing for columnar-integer
112    /// payloads. Composes with `with_gpu_preference` — both must be
113    /// on for any promotion to fire, and the columnar branch picks
114    /// `NvcompBitcomp` instead of `NvcompZstd` when the sample
115    /// matches the per-position-entropy signature of a u32 / u64 LE
116    /// integer column (Parquet, postings, time-series). When this
117    /// flag is off (default) the README's "integer/columnar →
118    /// Bitcomp" pitch is honoured manually via `--codec
119    /// nvcomp-bitcomp`; turning it on makes the SamplingDispatcher
120    /// pick Bitcomp automatically.
121    #[must_use]
122    pub fn with_columnar_gpu_preference(mut self, on: bool) -> Self {
123        self.prefer_columnar_gpu = on;
124        self
125    }
126
127    #[must_use]
128    pub fn with_entropy_threshold(mut self, t: f64) -> Self {
129        self.entropy_threshold = t;
130        self
131    }
132
133    /// v0.8 #56: enable GPU promotion. When `prefer_gpu = true`, a `CpuZstd`
134    /// pick on a body whose `total_size >= gpu_min_bytes` is rewritten to
135    /// `NvcompZstd`. Pass `prefer_gpu = false` (the default) to disable.
136    /// The threshold is in bytes; `1_048_576` (1 MiB) is the recommended
137    /// default for PCIe 4.0 hosts.
138    #[must_use]
139    pub fn with_gpu_preference(mut self, prefer_gpu: bool, gpu_min_bytes: usize) -> Self {
140        self.prefer_gpu = prefer_gpu;
141        self.gpu_min_bytes = gpu_min_bytes;
142        self
143    }
144}
145
146/// Shannon entropy (bits per byte) を sample から推定。0..=8 の範囲。
147fn shannon_entropy(sample: &[u8]) -> f64 {
148    if sample.is_empty() {
149        return 0.0;
150    }
151    let mut counts = [0u32; 256];
152    for &b in sample {
153        counts[b as usize] += 1;
154    }
155    let n = sample.len() as f64;
156    let mut entropy = 0.0;
157    for c in counts {
158        if c == 0 {
159            continue;
160        }
161        let p = f64::from(c) / n;
162        entropy -= p * p.log2();
163    }
164    entropy
165}
166
167/// v0.8.12 #125: minimum sample size at which the columnar-integer
168/// signature is statistically meaningful. Below this we'd be reading
169/// noise into the per-stride-position byte histogram. 512 bytes =
170/// 128 u32-stride samples per position, ~64 u64-stride samples.
171const COLUMNAR_MIN_SAMPLE: usize = 512;
172/// v0.8.12 #125: per-stride-position entropy gap that flags a sample
173/// as columnar-integer. Random data has near-uniform per-position
174/// entropy (gap ≈ 0); a u32 LE column of bounded values
175/// (`value < 2^24`) has full entropy on the low byte and ~0 entropy
176/// on the high byte (gap > 6). 4.0 bits is a conservative middle
177/// ground that catches u32 / u64 monotonic-id and timestamp columns
178/// without false-positives on text or mixed binary records.
179const COLUMNAR_ENTROPY_GAP: f64 = 4.0;
180/// v0.8.12 #125: per-position byte-histogram entropy. Reused for
181/// each stride position in [`looks_columnar_integer`]; same `[u8; 256]`
182/// shape as [`shannon_entropy`] for the whole sample.
183fn entropy_at_stride_position(sample: &[u8], stride: usize, pos: usize) -> f64 {
184    debug_assert!(pos < stride);
185    debug_assert!(stride > 0);
186    let mut counts = [0u32; 256];
187    let mut n = 0u32;
188    let mut i = pos;
189    while i < sample.len() {
190        counts[sample[i] as usize] += 1;
191        n += 1;
192        i += stride;
193    }
194    if n == 0 {
195        return 0.0;
196    }
197    let nf = f64::from(n);
198    let mut e = 0.0;
199    for c in counts {
200        if c == 0 {
201            continue;
202        }
203        let p = f64::from(c) / nf;
204        e -= p * p.log2();
205    }
206    e
207}
208
209/// v0.8.12 #125: detect a u32 / u64 little-endian integer column in
210/// the sample. Returns `true` when one stride's per-position entropy
211/// gap exceeds [`COLUMNAR_ENTROPY_GAP`] — the signature of a column
212/// whose high bytes are mostly zero (bounded ints) while the low
213/// bytes vary freely (counts / timestamps / sorted ids). Conservative
214/// by design: tested against Parquet u32 / u64 columns
215/// (`apache-parquet/test/data/`), pseudo-random bytes, English text,
216/// and DNA reads — only the integer columns trip the gap.
217fn looks_columnar_integer(sample: &[u8]) -> bool {
218    if sample.len() < COLUMNAR_MIN_SAMPLE {
219        return false;
220    }
221    for &stride in &[4usize, 8usize] {
222        // Need ≥ 64 strides for the per-position histogram to be
223        // stable; below that, even random data shows large gaps.
224        if sample.len() < stride * 64 {
225            continue;
226        }
227        let mut min_e = f64::INFINITY;
228        let mut max_e = f64::NEG_INFINITY;
229        for pos in 0..stride {
230            let e = entropy_at_stride_position(sample, stride, pos);
231            if e < min_e {
232                min_e = e;
233            }
234            if e > max_e {
235                max_e = e;
236            }
237        }
238        if max_e - min_e >= COLUMNAR_ENTROPY_GAP {
239            return true;
240        }
241    }
242    false
243}
244
245/// v0.8.15 M-7 / v0.8.16 F-12: confirm that the bytes *after* the
246/// magic-byte prefix look like compressed data (high entropy), not
247/// benign text whose leading 2-3 bytes happen to spell the magic.
248/// Returns `true` when the post-magic window has entropy `>= threshold`
249/// (default 7.5). Operates on `sample[16..]` ── 16 bytes of skip is
250/// enough to clear every magic this dispatcher knows about while
251/// leaving plenty of runway for the entropy estimate to be statistically
252/// meaningful.
253///
254/// v0.8.16 F-12 fix: small samples now default to `false` (= "don't
255/// trust the magic byte alone on short samples"). The v0.8.15 M-7
256/// motivation was a 40-byte `BZh:loglog:` user log file — but the
257/// pre-F-12 `<= SKIP+32` short-circuit returned `true`, so
258/// passthrough still fired on exactly the case M-7 was meant to
259/// catch. Real bzip2 / gzip / zstd objects are essentially never <
260/// 48 bytes; rejecting the magic on a short sample is the safer
261/// default. Operators who really want passthrough on tiny inputs
262/// can run `--codec passthrough` explicitly.
263fn post_magic_entropy_high(sample: &[u8], threshold: f64) -> bool {
264    const SKIP: usize = 16;
265    if sample.len() <= SKIP + 32 {
266        return false;
267    }
268    shannon_entropy(&sample[SKIP..]) >= threshold
269}
270
271/// 既圧縮データの magic bytes 検出。検出した場合は true を返す。
272fn looks_already_compressed(sample: &[u8]) -> bool {
273    // gzip
274    if sample.starts_with(&[0x1f, 0x8b]) {
275        return true;
276    }
277    // zstd
278    if sample.starts_with(&[0x28, 0xb5, 0x2f, 0xfd]) {
279        return true;
280    }
281    // PNG
282    if sample.starts_with(&[0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]) {
283        return true;
284    }
285    // JPEG (FF D8 FF)
286    if sample.len() >= 3 && sample[0] == 0xff && sample[1] == 0xd8 && sample[2] == 0xff {
287        return true;
288    }
289    // PDF
290    if sample.starts_with(b"%PDF-") {
291        return true;
292    }
293    // ZIP / docx / jar / apk
294    if sample.starts_with(&[0x50, 0x4b, 0x03, 0x04]) {
295        return true;
296    }
297    // 7z
298    if sample.starts_with(&[0x37, 0x7a, 0xbc, 0xaf, 0x27, 0x1c]) {
299        return true;
300    }
301    // xz
302    if sample.starts_with(&[0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00]) {
303        return true;
304    }
305    // bzip2
306    if sample.starts_with(b"BZh") {
307        return true;
308    }
309    // mp4 / m4a / mov (ISO Base Media): bytes 4..8 == "ftyp"
310    if sample.len() >= 8 && &sample[4..8] == b"ftyp" {
311        return true;
312    }
313    // webm / mkv (EBML)
314    if sample.starts_with(&[0x1a, 0x45, 0xdf, 0xa3]) {
315        return true;
316    }
317    // webp (RIFF .... WEBP)
318    if sample.len() >= 12 && sample.starts_with(b"RIFF") && &sample[8..12] == b"WEBP" {
319        return true;
320    }
321    false
322}
323
324impl SamplingDispatcher {
325    /// Core sample-only decision shared by `pick` and `pick_with_size_hint`.
326    /// Returns the pre-GPU-promotion choice; the size-hint-aware caller may
327    /// rewrite a `CpuZstd` result to `NvcompZstd` when the body is big enough.
328    ///
329    /// # Adversarial limitations (v0.8.15 M-6 / M-7)
330    ///
331    /// The sample is just the prefix the listener captured (typically
332    /// the first 4 KiB). An attacker who controls the upload bytes
333    /// can:
334    ///
335    /// - **Trick passthrough into firing** by prefixing a gzip / zstd
336    ///   magic and following it with 10 GiB of zeros, costing the
337    ///   gateway disk space the operator expected to save. Mitigated
338    ///   by requiring the post-magic window to *also* show high
339    ///   entropy — real compressed bytes have both, an unscrupulous
340    ///   text payload won't.
341    /// - **Trick passthrough into NOT firing** by prefixing 4 KiB of
342    ///   zeros to an already-compressed body, costing CPU on a
343    ///   useless compress pass. The dispatcher cannot defend against
344    ///   this without re-sampling other windows (a v0.8.15 follow-up;
345    ///   would require listener-side changes to capture multiple
346    ///   windows, not just the prefix).
347    ///
348    /// The sample-only path is "best-effort", not "adversarial".
349    /// Operators who need an adversarial guarantee should set
350    /// `--dispatcher always --codec cpu-zstd` (compress everything)
351    /// or `--codec passthrough` (compress nothing) and bypass the
352    /// sampler entirely.
353    fn pick_from_sample(&self, sample: &[u8]) -> CodecKind {
354        // v0.8.17 G-3: run the magic-byte + post-magic-entropy
355        // check FIRST, regardless of `MIN_SAMPLE_BYTES`. The
356        // v0.8.16 F-12 guard inside `post_magic_entropy_high`
357        // was never reachable because the upstream
358        // `< MIN_SAMPLE_BYTES (=128)` short-circuit subsumed the
359        // `<= 48` short-sample case the comment cited. Promote
360        // the magic-byte arm above the short-circuit and let
361        // `post_magic_entropy_high` decide for itself how to
362        // handle short samples — that's the only place where the
363        // F-12 `false` default actually matters and where the
364        // `BZh:loglog:` motivation gets caught.
365        if looks_already_compressed(sample)
366            && post_magic_entropy_high(sample, self.entropy_threshold)
367        {
368            return CodecKind::Passthrough;
369        }
370        if sample.len() < Self::MIN_SAMPLE_BYTES {
371            return self.default;
372        }
373        if shannon_entropy(sample) >= self.entropy_threshold {
374            return CodecKind::Passthrough;
375        }
376        self.default
377    }
378
379    /// v0.8 #56 / v0.8.12 #125: rewrite a `CpuZstd` pick to a GPU
380    /// codec when GPU preference is on AND the caller proved a total
381    /// body size >= `gpu_min_bytes`. v0.8.12 adds the columnar-integer
382    /// branch: when `prefer_columnar_gpu = true` AND the sample
383    /// matches the per-stride-position entropy signature of a
384    /// u32 / u64 LE integer column, route to `NvcompBitcomp` instead
385    /// of `NvcompZstd`. Passthrough / non-CpuZstd picks are left
386    /// alone — already-compressed bodies don't benefit from GPU
387    /// compression, and other CPU codecs (CpuGzip) imply the
388    /// operator wants wire-compatible output that the nvCOMP codecs
389    /// can't provide.
390    fn maybe_promote_to_gpu(
391        &self,
392        chosen: CodecKind,
393        sample: &[u8],
394        total_size: Option<u64>,
395    ) -> CodecKind {
396        if !self.prefer_gpu {
397            return chosen;
398        }
399        if chosen != CodecKind::CpuZstd {
400            return chosen;
401        }
402        let big_enough = match total_size {
403            Some(n) => n >= self.gpu_min_bytes as u64,
404            // No size hint (chunked transfer) → conservative, keep CpuZstd.
405            None => return chosen,
406        };
407        if !big_enough {
408            return chosen;
409        }
410        if self.prefer_columnar_gpu && looks_columnar_integer(sample) {
411            CodecKind::NvcompBitcomp
412        } else {
413            CodecKind::NvcompZstd
414        }
415    }
416}
417
418#[async_trait::async_trait]
419impl CodecDispatcher for SamplingDispatcher {
420    async fn pick(&self, sample: &[u8]) -> CodecKind {
421        // No size hint available → never promote to GPU.
422        self.pick_from_sample(sample)
423    }
424
425    async fn pick_with_size_hint(&self, sample: &[u8], total_size: Option<u64>) -> CodecKind {
426        let chosen = self.pick_from_sample(sample);
427        self.maybe_promote_to_gpu(chosen, sample, total_size)
428    }
429}
430
431/// `Box<dyn CodecDispatcher>` からも `CodecDispatcher` として使えるようにする blanket impl
432#[async_trait::async_trait]
433impl<T: CodecDispatcher + ?Sized> CodecDispatcher for Box<T> {
434    async fn pick(&self, sample: &[u8]) -> CodecKind {
435        (**self).pick(sample).await
436    }
437
438    async fn pick_with_size_hint(&self, sample: &[u8], total_size: Option<u64>) -> CodecKind {
439        (**self).pick_with_size_hint(sample, total_size).await
440    }
441}
442
443#[async_trait::async_trait]
444impl<T: CodecDispatcher + ?Sized> CodecDispatcher for std::sync::Arc<T> {
445    async fn pick(&self, sample: &[u8]) -> CodecKind {
446        (**self).pick(sample).await
447    }
448
449    async fn pick_with_size_hint(&self, sample: &[u8], total_size: Option<u64>) -> CodecKind {
450        (**self).pick_with_size_hint(sample, total_size).await
451    }
452}
453
454#[cfg(test)]
455mod tests {
456    use super::*;
457
458    #[tokio::test]
459    async fn always_dispatcher_returns_configured_kind() {
460        let d = AlwaysDispatcher(CodecKind::CpuZstd);
461        assert_eq!(d.pick(b"any input").await, CodecKind::CpuZstd);
462    }
463
464    #[tokio::test]
465    async fn boxed_dispatcher_works() {
466        let d: Box<dyn CodecDispatcher> = Box::new(AlwaysDispatcher(CodecKind::Passthrough));
467        assert_eq!(d.pick(b"x").await, CodecKind::Passthrough);
468    }
469
470    #[tokio::test]
471    async fn sampling_short_sample_uses_default() {
472        let d = SamplingDispatcher::new(CodecKind::CpuZstd);
473        assert_eq!(d.pick(b"short").await, CodecKind::CpuZstd);
474    }
475
476    #[tokio::test]
477    async fn sampling_text_picks_default() {
478        let d = SamplingDispatcher::new(CodecKind::CpuZstd);
479        // 1 KB の英語っぽい text (低エントロピー)
480        let text: Vec<u8> = "the quick brown fox jumps over the lazy dog. "
481            .repeat(30)
482            .into_bytes();
483        assert_eq!(d.pick(&text).await, CodecKind::CpuZstd);
484    }
485
486    #[tokio::test]
487    async fn sampling_random_bytes_picks_passthrough() {
488        let d = SamplingDispatcher::new(CodecKind::CpuZstd);
489        // 1 KB の高エントロピー (擬似ランダムデータを作る — XOR-shift で uniformish に)
490        let mut state: u64 = 0xfeed_beef_dead_c0de;
491        let mut payload = Vec::with_capacity(4096);
492        for _ in 0..4096 {
493            state ^= state << 13;
494            state ^= state >> 7;
495            state ^= state << 17;
496            payload.push((state & 0xff) as u8);
497        }
498        // entropy が default threshold (7.5) 以上のはず
499        let e = shannon_entropy(&payload);
500        assert!(
501            e > 7.5,
502            "expected high entropy on pseudo-random bytes, got {e}"
503        );
504        assert_eq!(d.pick(&payload).await, CodecKind::Passthrough);
505    }
506
507    #[tokio::test]
508    async fn sampling_gzip_magic_picks_passthrough() {
509        let d = SamplingDispatcher::new(CodecKind::CpuZstd);
510        // v0.8.15 M-7: the post-magic window must also look like
511        // compressed bytes (high entropy) for passthrough to fire.
512        // Use random-ish bytes instead of repeating `a` so the
513        // post-magic check passes.
514        let mut payload = vec![0x1f, 0x8b, 0x08]; // gzip magic + DEFLATE method
515        let mut state: u64 = 0xdead_c0de_feed_beef;
516        for _ in 0..512 {
517            state ^= state << 13;
518            state ^= state >> 7;
519            state ^= state << 17;
520            payload.push((state & 0xff) as u8);
521        }
522        assert_eq!(d.pick(&payload).await, CodecKind::Passthrough);
523    }
524
525    /// v0.8.15 M-7: a user log file starting with `BZh` followed by
526    /// English text (low entropy) MUST NOT trigger passthrough — the
527    /// pre-M-7 magic-byte check fired on that prefix alone, silently
528    /// skipping compression on customer logs that happened to begin
529    /// with bzip2's 3-byte magic.
530    #[tokio::test]
531    async fn sampling_magic_prefix_but_low_entropy_body_compresses() {
532        let d = SamplingDispatcher::new(CodecKind::CpuZstd);
533        let mut payload = b"BZh just a log line\n".to_vec();
534        // Append low-entropy English text to fill the sample window.
535        payload.extend(
536            "the quick brown fox jumps over the lazy dog. "
537                .repeat(20)
538                .into_bytes(),
539        );
540        assert_eq!(d.pick(&payload).await, CodecKind::CpuZstd);
541    }
542
543    #[tokio::test]
544    async fn sampling_png_magic_picks_passthrough() {
545        let d = SamplingDispatcher::new(CodecKind::CpuZstd);
546        // v0.8.15 M-7: real PNG bytes have high entropy after the
547        // magic — pseudo-random fill exercises the new "magic +
548        // post-magic high entropy" branch.
549        let mut payload = vec![0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a];
550        let mut state: u64 = 0xc0de_f00d_dead_face;
551        for _ in 0..512 {
552            state ^= state << 13;
553            state ^= state >> 7;
554            state ^= state << 17;
555            payload.push((state & 0xff) as u8);
556        }
557        assert_eq!(d.pick(&payload).await, CodecKind::Passthrough);
558    }
559
560    #[tokio::test]
561    async fn sampling_mp4_ftyp_picks_passthrough() {
562        let d = SamplingDispatcher::new(CodecKind::CpuZstd);
563        // v0.8.15 M-7: same shape — magic at bytes 4..8 plus a
564        // high-entropy body after for the post-magic check.
565        let mut payload = vec![0u8; 8];
566        payload[4..8].copy_from_slice(b"ftyp");
567        let mut state: u64 = 0x1234_5678_dead_beef;
568        for _ in 0..512 {
569            state ^= state << 13;
570            state ^= state >> 7;
571            state ^= state << 17;
572            payload.push((state & 0xff) as u8);
573        }
574        assert_eq!(d.pick(&payload).await, CodecKind::Passthrough);
575    }
576
577    #[test]
578    fn entropy_zero_for_uniform() {
579        let zeros = vec![0u8; 1024];
580        assert_eq!(shannon_entropy(&zeros), 0.0);
581    }
582
583    // ===========================================================
584    // v0.8 #56: GPU auto-detect / size-hint promotion
585    // ===========================================================
586
587    /// Build a 1 KiB low-entropy text sample (repeats a sentence) — the
588    /// post-magic-byte / post-entropy decision falls through to `default`,
589    /// which the v0.8 #56 promotion logic then either keeps as `CpuZstd`
590    /// or rewrites to `NvcompZstd`.
591    fn text_sample() -> Vec<u8> {
592        "the quick brown fox jumps over the lazy dog. "
593            .repeat(30)
594            .into_bytes()
595    }
596
597    #[tokio::test]
598    async fn gpu_pref_promotes_large_text_to_nvcomp_zstd() {
599        let d = SamplingDispatcher::new(CodecKind::CpuZstd).with_gpu_preference(true, 1_048_576);
600        let sample = text_sample();
601        // 2 MiB total body — past the 1 MiB threshold → GPU promotion.
602        let kind = d.pick_with_size_hint(&sample, Some(2 * 1024 * 1024)).await;
603        assert_eq!(kind, CodecKind::NvcompZstd);
604    }
605
606    #[tokio::test]
607    async fn gpu_pref_keeps_small_object_on_cpu() {
608        let d = SamplingDispatcher::new(CodecKind::CpuZstd).with_gpu_preference(true, 1_048_576);
609        let sample = text_sample();
610        // 100 KiB total body — under the 1 MiB threshold → GPU upload
611        // overhead would exceed compress savings, stay on CPU.
612        let kind = d.pick_with_size_hint(&sample, Some(100 * 1024)).await;
613        assert_eq!(kind, CodecKind::CpuZstd);
614    }
615
616    #[tokio::test]
617    async fn gpu_pref_off_keeps_cpu_even_for_large_object() {
618        // Default — no `with_gpu_preference` call → prefer_gpu = false.
619        let d = SamplingDispatcher::new(CodecKind::CpuZstd);
620        let sample = text_sample();
621        let kind = d.pick_with_size_hint(&sample, Some(10 * 1024 * 1024)).await;
622        assert_eq!(kind, CodecKind::CpuZstd);
623    }
624
625    #[tokio::test]
626    async fn gpu_pref_does_not_override_passthrough_on_high_entropy() {
627        let d = SamplingDispatcher::new(CodecKind::CpuZstd).with_gpu_preference(true, 1_048_576);
628        // High-entropy pseudo-random payload → entropy filter wins,
629        // returns Passthrough; GPU promotion is skipped because
630        // already-compressed data won't compress further on GPU either.
631        let mut state: u64 = 0xfeed_beef_dead_c0de;
632        let mut payload = Vec::with_capacity(4096);
633        for _ in 0..4096 {
634            state ^= state << 13;
635            state ^= state >> 7;
636            state ^= state << 17;
637            payload.push((state & 0xff) as u8);
638        }
639        let kind = d.pick_with_size_hint(&payload, Some(8 * 1024 * 1024)).await;
640        assert_eq!(kind, CodecKind::Passthrough);
641    }
642
643    #[tokio::test]
644    async fn gpu_pref_with_no_size_hint_stays_conservative() {
645        let d = SamplingDispatcher::new(CodecKind::CpuZstd).with_gpu_preference(true, 1_048_576);
646        let sample = text_sample();
647        // Chunked transfer: caller has no Content-Length, so total_size =
648        // None. We can't safely commit to GPU because the body might be
649        // tiny — stay on CPU.
650        let kind = d.pick_with_size_hint(&sample, None).await;
651        assert_eq!(kind, CodecKind::CpuZstd);
652    }
653
654    // ===========================================================
655    // v0.8.12 #125: columnar-integer detection + Bitcomp routing
656    // ===========================================================
657
658    /// 1 KiB of u32 LE monotonic counts (postings / sorted ids). The
659    /// low byte cycles 0..256, the middle bytes barely move, and the
660    /// high byte stays at 0 — exactly the per-position-entropy
661    /// signature `looks_columnar_integer` is built to catch.
662    fn u32_monotonic_postings() -> Vec<u8> {
663        let mut buf = Vec::with_capacity(4096);
664        for i in 0u32..1024 {
665            buf.extend_from_slice(&i.to_le_bytes());
666        }
667        buf
668    }
669
670    /// 4 KiB of u64 LE near-monotonic timestamps (Unix epoch nanos —
671    /// stride 8, the high 3 bytes are nearly constant, the bottom 5
672    /// drift slowly).
673    fn u64_timestamps() -> Vec<u8> {
674        let base: u64 = 1_700_000_000_000_000_000;
675        let mut buf = Vec::with_capacity(4096);
676        for i in 0u64..512 {
677            buf.extend_from_slice(&(base + i * 137).to_le_bytes());
678        }
679        buf
680    }
681
682    #[test]
683    fn columnar_detect_flags_u32_postings() {
684        assert!(looks_columnar_integer(&u32_monotonic_postings()));
685    }
686
687    #[test]
688    fn columnar_detect_flags_u64_timestamps() {
689        assert!(looks_columnar_integer(&u64_timestamps()));
690    }
691
692    #[test]
693    fn columnar_detect_rejects_english_text() {
694        let text: Vec<u8> = "the quick brown fox jumps over the lazy dog. "
695            .repeat(50)
696            .into_bytes();
697        // English text has reasonably uniform per-stride-position
698        // entropy — no single byte position dominates the entropy.
699        assert!(!looks_columnar_integer(&text));
700    }
701
702    #[test]
703    fn columnar_detect_rejects_random_bytes() {
704        let mut state: u64 = 0xa5a5_5a5a_dead_beef;
705        let mut payload = Vec::with_capacity(4096);
706        for _ in 0..4096 {
707            state ^= state << 13;
708            state ^= state >> 7;
709            state ^= state << 17;
710            payload.push((state & 0xff) as u8);
711        }
712        assert!(!looks_columnar_integer(&payload));
713    }
714
715    #[test]
716    fn columnar_detect_rejects_too_small_sample() {
717        // 256 bytes < COLUMNAR_MIN_SAMPLE (512) — must short-circuit
718        // to `false` so we never flag a tiny request as columnar.
719        let mut buf = Vec::with_capacity(256);
720        for i in 0u32..64 {
721            buf.extend_from_slice(&i.to_le_bytes());
722        }
723        assert!(!looks_columnar_integer(&buf));
724    }
725
726    #[tokio::test]
727    async fn gpu_pref_columnar_promotes_postings_to_bitcomp() {
728        let d = SamplingDispatcher::new(CodecKind::CpuZstd)
729            .with_gpu_preference(true, 1_048_576)
730            .with_columnar_gpu_preference(true);
731        let sample = u32_monotonic_postings();
732        let kind = d.pick_with_size_hint(&sample, Some(8 * 1024 * 1024)).await;
733        assert_eq!(kind, CodecKind::NvcompBitcomp);
734    }
735
736    #[tokio::test]
737    async fn gpu_pref_columnar_promotes_timestamps_to_bitcomp() {
738        let d = SamplingDispatcher::new(CodecKind::CpuZstd)
739            .with_gpu_preference(true, 1_048_576)
740            .with_columnar_gpu_preference(true);
741        let sample = u64_timestamps();
742        let kind = d.pick_with_size_hint(&sample, Some(4 * 1024 * 1024)).await;
743        assert_eq!(kind, CodecKind::NvcompBitcomp);
744    }
745
746    #[tokio::test]
747    async fn gpu_pref_columnar_falls_through_to_zstd_on_text() {
748        // Columnar detector rejects text → Bitcomp routing skipped,
749        // existing NvcompZstd promotion (#56) takes over.
750        let d = SamplingDispatcher::new(CodecKind::CpuZstd)
751            .with_gpu_preference(true, 1_048_576)
752            .with_columnar_gpu_preference(true);
753        let sample = text_sample();
754        let kind = d.pick_with_size_hint(&sample, Some(2 * 1024 * 1024)).await;
755        assert_eq!(kind, CodecKind::NvcompZstd);
756    }
757
758    #[tokio::test]
759    async fn gpu_pref_columnar_off_keeps_postings_on_zstd() {
760        // Default — `with_columnar_gpu_preference` NOT called → the
761        // README's "manual `--codec nvcomp-bitcomp`" path is the
762        // only way to reach Bitcomp.
763        let d = SamplingDispatcher::new(CodecKind::CpuZstd).with_gpu_preference(true, 1_048_576);
764        let sample = u32_monotonic_postings();
765        let kind = d.pick_with_size_hint(&sample, Some(8 * 1024 * 1024)).await;
766        assert_eq!(kind, CodecKind::NvcompZstd);
767    }
768
769    #[tokio::test]
770    async fn gpu_pref_columnar_respects_size_threshold() {
771        // Columnar payload but under the gpu_min_bytes threshold →
772        // GPU upload overhead would exceed the compress gain, stay
773        // on CpuZstd. The Bitcomp branch must not bypass the size
774        // gate.
775        let d = SamplingDispatcher::new(CodecKind::CpuZstd)
776            .with_gpu_preference(true, 1_048_576)
777            .with_columnar_gpu_preference(true);
778        let sample = u32_monotonic_postings();
779        let kind = d.pick_with_size_hint(&sample, Some(100 * 1024)).await;
780        assert_eq!(kind, CodecKind::CpuZstd);
781    }
782
783    #[test]
784    fn entropy_full_8_for_each_byte_once() {
785        // 0..256 を 1 度ずつ → 各 byte の確率 1/256 → entropy = 8 bits
786        let mut payload: Vec<u8> = (0..=255).collect();
787        // 256 byte は最小 sample 未満になりうるので 1024 まで複製 (entropy は不変)
788        let copy = payload.clone();
789        for _ in 0..3 {
790            payload.extend_from_slice(&copy);
791        }
792        let e = shannon_entropy(&payload);
793        assert!((e - 8.0).abs() < 0.0001, "expected 8.0, got {e}");
794    }
795}