Skip to main content

s4_codec/
lib.rs

1//! S4 圧縮 codec layer。バックエンドを差し替え可能にする中立 trait を提供する。
2//!
3//! ## 採用 backend (2026-05 検討)
4//!
5//! - **nvCOMP** (NVIDIA proprietary、要 license 確認): Bitcomp / gANS / zstd-GPU
6//! - **DietGPU** (Meta, MIT): ANS-only、license clean な fallback
7//! - **CPU zstd**: GPU 無し環境向け究極の fallback / test bed
8
9use std::str::FromStr;
10
11use bytes::Bytes;
12use serde::{Deserialize, Serialize};
13use thiserror::Error;
14
15pub mod cpu_gzip;
16pub mod cpu_zstd;
17pub mod dietgpu;
18pub mod dispatcher;
19#[cfg(feature = "nvcomp-gpu")]
20mod ferro_compress;
21pub mod index;
22pub mod multipart;
23pub mod nvcomp;
24pub mod passthrough;
25pub mod registry;
26
27pub use registry::CodecRegistry;
28
29/// 圧縮 codec の種類 (manifest に記録、後段の decompress で codec を確定するために使う)
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
31#[serde(rename_all = "kebab-case")]
32pub enum CodecKind {
33    Passthrough,
34    NvcompBitcomp,
35    NvcompGans,
36    NvcompZstd,
37    DietGpuAns,
38    CpuZstd,
39    /// nvCOMP GDeflate (v0.2 #9). DEFLATE-family GPU codec; output bytes are
40    /// NOT gzip-compatible at the wire level (different framing) but the
41    /// algorithm-level format aligns with stock DEFLATE/zlib decoders given
42    /// the right wrapper.
43    NvcompGDeflate,
44    /// CPU gzip via `flate2` (v0.4 #26). Produces RFC 1952 gzip output that
45    /// any standard `gunzip`-aware client can decode without knowing about
46    /// S4. Pair with the `Content-Encoding: gzip` header to serve to a
47    /// browser / curl that's never heard of S4.
48    CpuGzip,
49}
50
51impl CodecKind {
52    pub fn as_str(self) -> &'static str {
53        match self {
54            Self::Passthrough => "passthrough",
55            Self::NvcompBitcomp => "nvcomp-bitcomp",
56            Self::NvcompGans => "nvcomp-gans",
57            Self::NvcompZstd => "nvcomp-zstd",
58            Self::DietGpuAns => "dietgpu-ans",
59            Self::CpuZstd => "cpu-zstd",
60            Self::NvcompGDeflate => "nvcomp-gdeflate",
61            Self::CpuGzip => "cpu-gzip",
62        }
63    }
64
65    /// 安定 numeric ID。`s4-codec/multipart.rs` の frame header に書き込む際に使う。
66    /// ⚠️ **この値は wire format の一部** — 既存値の変更禁止 (新 codec は新 ID を割当)。
67    pub fn id(self) -> u32 {
68        match self {
69            Self::Passthrough => 0,
70            Self::CpuZstd => 1,
71            Self::NvcompZstd => 2,
72            Self::NvcompBitcomp => 3,
73            Self::NvcompGans => 4,
74            Self::DietGpuAns => 5,
75            Self::NvcompGDeflate => 6,
76            Self::CpuGzip => 7,
77        }
78    }
79
80    pub fn from_id(id: u32) -> Option<Self> {
81        Some(match id {
82            0 => Self::Passthrough,
83            1 => Self::CpuZstd,
84            2 => Self::NvcompZstd,
85            3 => Self::NvcompBitcomp,
86            4 => Self::NvcompGans,
87            5 => Self::DietGpuAns,
88            6 => Self::NvcompGDeflate,
89            7 => Self::CpuGzip,
90            _ => return None,
91        })
92    }
93}
94
95#[derive(Debug, thiserror::Error)]
96#[error("unknown codec kind: {0}")]
97pub struct ParseCodecKindError(String);
98
99impl FromStr for CodecKind {
100    type Err = ParseCodecKindError;
101    fn from_str(s: &str) -> Result<Self, Self::Err> {
102        Ok(match s {
103            "passthrough" => Self::Passthrough,
104            "nvcomp-bitcomp" => Self::NvcompBitcomp,
105            "nvcomp-gans" => Self::NvcompGans,
106            "nvcomp-zstd" => Self::NvcompZstd,
107            "dietgpu-ans" => Self::DietGpuAns,
108            "cpu-zstd" => Self::CpuZstd,
109            "nvcomp-gdeflate" => Self::NvcompGDeflate,
110            "cpu-gzip" => Self::CpuGzip,
111            other => return Err(ParseCodecKindError(other.into())),
112        })
113    }
114}
115
116/// 圧縮済 chunk のメタ情報。S3 オブジェクトの metadata に格納される。
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct ChunkManifest {
119    pub codec: CodecKind,
120    pub original_size: u64,
121    pub compressed_size: u64,
122    pub crc32c: u32,
123}
124
125/// codec 操作のエラー型。`anyhow::Error` ではなく専用型にすることで、上位 (S4Service) が
126/// HTTP エラーコードを意味的に出し分けやすくする。
127#[derive(Debug, Error)]
128pub enum CodecError {
129    #[error("codec mismatch: expected {expected:?}, got {got:?}")]
130    CodecMismatch { expected: CodecKind, got: CodecKind },
131
132    #[error("crc32c mismatch (chunk corruption?): expected {expected:#010x}, got {got:#010x}")]
133    CrcMismatch { expected: u32, got: u32 },
134
135    #[error("compressed size mismatch: manifest says {expected} bytes, payload is {got} bytes")]
136    SizeMismatch { expected: u64, got: u64 },
137
138    #[error("compression backend error: {0}")]
139    Backend(#[from] anyhow::Error),
140
141    #[error("io error: {0}")]
142    Io(#[from] std::io::Error),
143
144    #[error("blocking-task join error: {0}")]
145    Join(#[from] tokio::task::JoinError),
146
147    #[error("codec {0:?} is not registered in this CodecRegistry")]
148    UnregisteredCodec(CodecKind),
149}
150
151/// pluggable な圧縮 backend trait。
152///
153/// すべて async — GPU codec は CUDA stream に await でき、CPU codec は
154/// `spawn_blocking` で別スレッドへ逃がす。
155#[async_trait::async_trait]
156pub trait Codec: Send + Sync {
157    /// この実装が提供する codec の種類
158    fn kind(&self) -> CodecKind;
159
160    /// 圧縮: 入力 bytes → 圧縮済 bytes + manifest
161    async fn compress(&self, input: Bytes) -> Result<(Bytes, ChunkManifest), CodecError>;
162
163    /// 解凍: 圧縮済 bytes + manifest → 元の bytes
164    async fn decompress(&self, input: Bytes, manifest: &ChunkManifest)
165    -> Result<Bytes, CodecError>;
166}
167
168pub use dispatcher::CodecDispatcher;