Skip to main content

flow_fcs_compress/codec/
mod.rs

1//! Codec abstraction. Each [`ColumnCodec`] encodes one chunk of a single FCS
2//! parameter (column) into bytes, and decodes those bytes into a caller-provided
3//! `&mut [f32]` buffer — no intermediate `Vec<f32>` allocations.
4
5use crate::error::Result;
6
7pub mod adc_bitpack;
8pub mod auto;
9pub mod log_quant;
10pub mod lossless_f32;
11#[cfg(feature = "pco-backend")]
12pub mod lossless_f32_pco;
13#[cfg(feature = "lz4-baseline")]
14pub mod lz4_baseline;
15
16/// Stable on-disk codec identifier. Wire numbers MUST NOT be reused or renumbered.
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
18#[repr(u16)]
19#[non_exhaustive]
20pub enum CodecId {
21    /// Mode A: byte-stream-split planes + zstd. Lossless f32.
22    LosslessF32BssZstd = 0x0001,
23    /// Mode A alt backend: pco. Lossless f32. (M3+)
24    LosslessF32Pco = 0x0002,
25    /// Mode B: quantize via $PnB/$PnR + fastlanes bitpack. Lossless within ADC. (M3)
26    AdcBitpack = 0x0010,
27    /// Mode C: biexp transform + fixed-point quantize + bitpack. Lossy. (M4)
28    LogQuantization = 0x0020,
29    /// Baseline: raw f32 LE bytes through zstd. Useful as a comparison codec.
30    RawZstd = 0x00F0,
31    /// Baseline: raw f32 LE bytes uncompressed (round-trip sanity).
32    RawNone = 0x00FF,
33}
34
35impl CodecId {
36    pub fn from_wire(value: u16) -> Option<Self> {
37        match value {
38            0x0001 => Some(Self::LosslessF32BssZstd),
39            0x0002 => Some(Self::LosslessF32Pco),
40            0x0010 => Some(Self::AdcBitpack),
41            0x0020 => Some(Self::LogQuantization),
42            0x00F0 => Some(Self::RawZstd),
43            0x00FF => Some(Self::RawNone),
44            _ => None,
45        }
46    }
47
48    pub fn to_wire(self) -> u16 {
49        self as u16
50    }
51}
52
53/// Per-channel parameters derived from FCS keywords. Carried alongside every
54/// encode/decode call rather than baked into codec instances so that the same
55/// codec object can be reused across channels.
56#[derive(Debug, Clone)]
57pub struct ChannelParams {
58    /// `$PnN` — short name (used for logs/diagnostics, not codec choice).
59    pub name: String,
60    /// `$PnB` — bits per parameter as stored on disk (not necessarily ADC bits).
61    pub stored_bits: u8,
62    /// `$PnR` — parameter range (max value + 1, per FCS spec).
63    pub range: u32,
64    /// `$PnE` — `(decades, offset)`. `(0.0, 0.0)` denotes linear scaling.
65    pub log_decades: (f32, f32),
66    /// True ADC bit depth, if known. Used by Mode B; falls back to `ceil(log2(range))`.
67    pub adc_bits: Option<u8>,
68    /// Whether the channel may carry negative values (post-compensation/unmixing).
69    pub signed: bool,
70}
71
72impl ChannelParams {
73    pub fn linear_unsigned(name: impl Into<String>, range: u32) -> Self {
74        Self {
75            name: name.into(),
76            stored_bits: 32,
77            range,
78            log_decades: (0.0, 0.0),
79            adc_bits: None,
80            signed: false,
81        }
82    }
83
84    pub fn is_log(&self) -> bool {
85        self.log_decades.0 > 0.0
86    }
87}
88
89/// Statistics returned from an encode call. Useful for benchmarking and for
90/// populating chunk-index metadata in the `.fcz` container.
91#[derive(Debug, Clone, Copy, Default)]
92pub struct EncodeStats {
93    pub input_events: u32,
94    pub input_bytes: u64,
95    pub output_bytes: u64,
96}
97
98impl EncodeStats {
99    pub fn ratio(&self) -> f64 {
100        if self.output_bytes == 0 {
101            0.0
102        } else {
103            self.input_bytes as f64 / self.output_bytes as f64
104        }
105    }
106}
107
108/// Per-chunk column codec. Implementors MUST be stateless w.r.t. encode/decode
109/// (any per-call state belongs in the payload or in [`ChannelParams`]).
110pub trait ColumnCodec: Send + Sync {
111    fn id(&self) -> CodecId;
112
113    /// Encode `input` events for one column into `out`. Returns stats describing
114    /// the produced payload. `out` is appended to, not replaced.
115    fn encode_chunk(
116        &self,
117        input: &[f32],
118        params: &ChannelParams,
119        out: &mut Vec<u8>,
120    ) -> Result<EncodeStats>;
121
122    /// Decode `payload` into `out`. `out.len()` MUST equal the original event
123    /// count. Implementations should treat `out` as uninitialized and fully
124    /// overwrite every element.
125    fn decode_chunk(
126        &self,
127        payload: &[u8],
128        params: &ChannelParams,
129        out: &mut [f32],
130    ) -> Result<()>;
131}