Skip to main content

flow_fcs_compress/codec/
lossless_f32.rs

1//! Mode A: lossless f32 codecs.
2//!
3//! The default backend is byte-stream-split followed by zstd. Rationale: unmixed
4//! flow cytometry channels carry full f32 entropy in the mantissa (so dictionary
5//! coders like ALP underperform), but the exponent and high-mantissa byte planes
6//! have very narrow distributions across a single channel. Splitting into byte
7//! planes lets zstd's match-finder exploit that narrowness directly.
8
9use std::io::{Cursor, Write};
10
11use crate::codec::{ChannelParams, CodecId, ColumnCodec, EncodeStats};
12use crate::error::Result;
13use crate::transform::byte_stream_split::{split_f32_le, unsplit_f32_le};
14
15/// Mode A default codec: byte-stream-split + zstd.
16#[derive(Debug, Clone)]
17pub struct BssZstd {
18    pub level: i32,
19}
20
21impl Default for BssZstd {
22    fn default() -> Self {
23        Self { level: 3 }
24    }
25}
26
27impl BssZstd {
28    pub fn new(level: i32) -> Self {
29        Self { level }
30    }
31}
32
33impl ColumnCodec for BssZstd {
34    fn id(&self) -> CodecId {
35        CodecId::LosslessF32BssZstd
36    }
37
38    fn encode_chunk(
39        &self,
40        input: &[f32],
41        _params: &ChannelParams,
42        out: &mut Vec<u8>,
43    ) -> Result<EncodeStats> {
44        let mut planes = Vec::with_capacity(input.len() * 4);
45        split_f32_le(input, &mut planes);
46
47        let start = out.len();
48        let mut encoder = zstd::Encoder::new(out, self.level)?;
49        encoder.write_all(&planes)?;
50        let out = encoder.finish()?;
51
52        Ok(EncodeStats {
53            input_events: input.len() as u32,
54            input_bytes: (input.len() * 4) as u64,
55            output_bytes: (out.len() - start) as u64,
56        })
57    }
58
59    fn decode_chunk(
60        &self,
61        payload: &[u8],
62        _params: &ChannelParams,
63        out: &mut [f32],
64    ) -> Result<()> {
65        let n = out.len();
66        let mut planes = Vec::with_capacity(n * 4);
67        let mut decoder = zstd::Decoder::new(Cursor::new(payload))?;
68        std::io::copy(&mut decoder, &mut planes)?;
69        unsplit_f32_le(&planes, out);
70        Ok(())
71    }
72}
73
74/// Baseline: raw f32 LE bytes through zstd. Useful for ratio comparisons and as
75/// a fallback when a channel turns out to compress better without BSS.
76#[derive(Debug, Clone)]
77pub struct RawZstd {
78    pub level: i32,
79}
80
81impl Default for RawZstd {
82    fn default() -> Self {
83        Self { level: 3 }
84    }
85}
86
87impl ColumnCodec for RawZstd {
88    fn id(&self) -> CodecId {
89        CodecId::RawZstd
90    }
91
92    fn encode_chunk(
93        &self,
94        input: &[f32],
95        _params: &ChannelParams,
96        out: &mut Vec<u8>,
97    ) -> Result<EncodeStats> {
98        let bytes = bytemuck::cast_slice::<f32, u8>(input);
99        let start = out.len();
100        let mut encoder = zstd::Encoder::new(out, self.level)?;
101        encoder.write_all(bytes)?;
102        let out = encoder.finish()?;
103        Ok(EncodeStats {
104            input_events: input.len() as u32,
105            input_bytes: bytes.len() as u64,
106            output_bytes: (out.len() - start) as u64,
107        })
108    }
109
110    fn decode_chunk(
111        &self,
112        payload: &[u8],
113        _params: &ChannelParams,
114        out: &mut [f32],
115    ) -> Result<()> {
116        let dst = bytemuck::cast_slice_mut::<f32, u8>(out);
117        let mut decoder = zstd::Decoder::new(Cursor::new(payload))?;
118        let mut written = 0;
119        let mut tmp = [0u8; 4096];
120        loop {
121            let n = std::io::Read::read(&mut decoder, &mut tmp)?;
122            if n == 0 {
123                break;
124            }
125            dst[written..written + n].copy_from_slice(&tmp[..n]);
126            written += n;
127        }
128        Ok(())
129    }
130}
131
132/// Baseline: raw f32 LE bytes, no compression. Useful for round-trip sanity tests.
133#[derive(Debug, Clone, Default)]
134pub struct RawNone;
135
136impl ColumnCodec for RawNone {
137    fn id(&self) -> CodecId {
138        CodecId::RawNone
139    }
140
141    fn encode_chunk(
142        &self,
143        input: &[f32],
144        _params: &ChannelParams,
145        out: &mut Vec<u8>,
146    ) -> Result<EncodeStats> {
147        let bytes = bytemuck::cast_slice::<f32, u8>(input);
148        out.extend_from_slice(bytes);
149        Ok(EncodeStats {
150            input_events: input.len() as u32,
151            input_bytes: bytes.len() as u64,
152            output_bytes: bytes.len() as u64,
153        })
154    }
155
156    fn decode_chunk(
157        &self,
158        payload: &[u8],
159        _params: &ChannelParams,
160        out: &mut [f32],
161    ) -> Result<()> {
162        let dst = bytemuck::cast_slice_mut::<f32, u8>(out);
163        dst.copy_from_slice(payload);
164        Ok(())
165    }
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171
172    fn linear_params() -> ChannelParams {
173        ChannelParams::linear_unsigned("FSC-A", 262_144)
174    }
175
176    fn synthesize_channel(n: usize, seed: u64) -> Vec<f32> {
177        // mix of small positives, near-zero noise, and a few outliers — typical
178        // post-compensation channel shape.
179        let mut x = Vec::with_capacity(n);
180        let mut s = seed;
181        for i in 0..n {
182            s = s.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
183            let u = ((s >> 32) as u32) as f32 / u32::MAX as f32;
184            let base = (i as f32) * 0.001;
185            let noise = (u - 0.5) * 50.0;
186            let outlier = if i % 997 == 0 { 100_000.0 } else { 0.0 };
187            x.push(base + noise + outlier);
188        }
189        x
190    }
191
192    #[test]
193    fn bss_zstd_roundtrips() {
194        let codec = BssZstd::default();
195        let params = linear_params();
196        let input = synthesize_channel(8192, 0xCAFEBABE);
197
198        let mut payload = Vec::new();
199        let stats = codec.encode_chunk(&input, &params, &mut payload).unwrap();
200        assert_eq!(stats.input_events, 8192);
201        assert!(stats.output_bytes > 0);
202
203        let mut out = vec![0.0f32; input.len()];
204        codec.decode_chunk(&payload, &params, &mut out).unwrap();
205        for (a, b) in input.iter().zip(out.iter()) {
206            assert_eq!(a.to_bits(), b.to_bits(), "lossless requirement violated");
207        }
208    }
209
210    #[test]
211    fn raw_zstd_roundtrips() {
212        let codec = RawZstd::default();
213        let params = linear_params();
214        let input = synthesize_channel(4096, 0xBADF00D);
215
216        let mut payload = Vec::new();
217        codec.encode_chunk(&input, &params, &mut payload).unwrap();
218        let mut out = vec![0.0f32; input.len()];
219        codec.decode_chunk(&payload, &params, &mut out).unwrap();
220        for (a, b) in input.iter().zip(out.iter()) {
221            assert_eq!(a.to_bits(), b.to_bits());
222        }
223    }
224
225    #[test]
226    fn raw_none_roundtrips() {
227        let codec = RawNone;
228        let params = linear_params();
229        let input = synthesize_channel(1024, 1);
230        let mut payload = Vec::new();
231        codec.encode_chunk(&input, &params, &mut payload).unwrap();
232        assert_eq!(payload.len(), input.len() * 4);
233        let mut out = vec![0.0f32; input.len()];
234        codec.decode_chunk(&payload, &params, &mut out).unwrap();
235        assert_eq!(out, input);
236    }
237
238    #[test]
239    fn bss_beats_raw_on_smooth_data() {
240        let params = linear_params();
241        // smooth ramp — BSS should compress dramatically better than zstd-on-interleaved
242        let input: Vec<f32> = (0..8192).map(|i| (i as f32) * 0.25).collect();
243
244        let mut bss = Vec::new();
245        BssZstd::default().encode_chunk(&input, &params, &mut bss).unwrap();
246        let mut raw = Vec::new();
247        RawZstd::default().encode_chunk(&input, &params, &mut raw).unwrap();
248
249        assert!(
250            bss.len() < raw.len(),
251            "BSS+zstd ({} bytes) should beat raw+zstd ({} bytes) on smooth data",
252            bss.len(),
253            raw.len(),
254        );
255    }
256}