Skip to main content

oxiphysics_io/
physics_binary.rs

1// Copyright 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4//! Custom binary format for physics field data.
5//!
6//! Provides a compact binary representation with a fixed header, run-length
7//! encoding (RLE) for float sequences, CRC32 checksums, and integrity
8//! verification.
9
10// ── Header ────────────────────────────────────────────────────────────────────
11
12/// Magic bytes that identify a physics binary field file.
13pub const MAGIC: [u8; 4] = *b"OXPF";
14
15/// Current format version.
16pub const FORMAT_VERSION: u16 = 1;
17
18/// Data type tag stored in the header.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20#[repr(u8)]
21pub enum FieldDataType {
22    /// 64-bit IEEE 754 float.
23    Float64 = 0,
24    /// 32-bit IEEE 754 float.
25    Float32 = 1,
26    /// 32-bit signed integer.
27    Int32 = 2,
28}
29
30impl FieldDataType {
31    /// Attempt to parse a `u8` tag into a `FieldDataType`.
32    pub fn from_u8(tag: u8) -> Option<Self> {
33        match tag {
34            0 => Some(Self::Float64),
35            1 => Some(Self::Float32),
36            2 => Some(Self::Int32),
37            _ => None,
38        }
39    }
40}
41
42/// Header for a binary physics field record.
43#[derive(Debug, Clone, PartialEq, Eq)]
44pub struct BinaryHeader {
45    /// Magic identifier (`OXPF`).
46    pub magic: [u8; 4],
47    /// Format version.
48    pub version: u16,
49    /// Element data type.
50    pub data_type: FieldDataType,
51    /// Number of dimensions (1, 2, or 3).
52    pub ndim: u8,
53    /// Dimension sizes (`[nx, ny, nz]`; unused dims are `1`).
54    pub dims: [u32; 3],
55    /// CRC32 checksum of the payload bytes.
56    pub checksum: u32,
57}
58
59impl BinaryHeader {
60    /// Serialised size in bytes.
61    pub const SIZE: usize = 4 + 2 + 1 + 1 + 12 + 4; // 24 bytes
62
63    /// Serialise the header to bytes.
64    pub fn to_bytes(&self) -> [u8; Self::SIZE] {
65        let mut buf = [0u8; Self::SIZE];
66        buf[0..4].copy_from_slice(&self.magic);
67        buf[4..6].copy_from_slice(&self.version.to_le_bytes());
68        buf[6] = self.data_type as u8;
69        buf[7] = self.ndim;
70        buf[8..12].copy_from_slice(&self.dims[0].to_le_bytes());
71        buf[12..16].copy_from_slice(&self.dims[1].to_le_bytes());
72        buf[16..20].copy_from_slice(&self.dims[2].to_le_bytes());
73        buf[20..24].copy_from_slice(&self.checksum.to_le_bytes());
74        buf
75    }
76
77    /// Deserialise a header from a 24-byte slice.
78    pub fn from_bytes(buf: &[u8]) -> Result<Self, String> {
79        if buf.len() < Self::SIZE {
80            return Err(format!(
81                "Header too short: expected {} bytes, got {}",
82                Self::SIZE,
83                buf.len()
84            ));
85        }
86        let magic: [u8; 4] = buf[0..4].try_into().expect("slice length must match");
87        let version = u16::from_le_bytes(buf[4..6].try_into().expect("slice length must match"));
88        let data_type = FieldDataType::from_u8(buf[6])
89            .ok_or_else(|| format!("Unknown data type: {}", buf[6]))?;
90        let ndim = buf[7];
91        let d0 = u32::from_le_bytes(buf[8..12].try_into().expect("slice length must match"));
92        let d1 = u32::from_le_bytes(buf[12..16].try_into().expect("slice length must match"));
93        let d2 = u32::from_le_bytes(buf[16..20].try_into().expect("slice length must match"));
94        let checksum = u32::from_le_bytes(buf[20..24].try_into().expect("slice length must match"));
95        Ok(Self {
96            magic,
97            version,
98            data_type,
99            ndim,
100            dims: [d0, d1, d2],
101            checksum,
102        })
103    }
104
105    /// Total number of elements (`dims[0] * dims[1] * dims[2]`).
106    pub fn element_count(&self) -> usize {
107        self.dims[0] as usize * self.dims[1] as usize * self.dims[2] as usize
108    }
109}
110
111// ── Write / Read ──────────────────────────────────────────────────────────────
112
113/// Encode a `f64` slice as raw little-endian bytes.
114fn f64_slice_to_bytes(data: &[f64]) -> Vec<u8> {
115    let mut bytes = Vec::with_capacity(data.len() * 8);
116    for &v in data {
117        bytes.extend_from_slice(&v.to_le_bytes());
118    }
119    bytes
120}
121
122/// Decode a little-endian byte buffer into a `f64` Vec.
123fn bytes_to_f64_slice(bytes: &[u8]) -> Result<Vec<f64>, String> {
124    if !bytes.len().is_multiple_of(8) {
125        return Err(format!(
126            "Payload length {} is not a multiple of 8",
127            bytes.len()
128        ));
129    }
130    let mut out = Vec::with_capacity(bytes.len() / 8);
131    for chunk in bytes.chunks_exact(8) {
132        out.push(f64::from_le_bytes(
133            chunk.try_into().expect("slice length must match"),
134        ));
135    }
136    Ok(out)
137}
138
139/// Write a `f64` field with a binary header.
140///
141/// Returns the complete byte stream: `[header (24 bytes)] + [payload]`.
142/// The header checksum is computed automatically.
143///
144/// * `data` – the field values in row-major order
145/// * `dims` – `[nx, ny, nz]` (set unused dims to 1)
146/// * `ndim` – number of active dimensions (1, 2, or 3)
147pub fn write_binary_field(data: &[f64], dims: [u32; 3], ndim: u8) -> Vec<u8> {
148    let payload = f64_slice_to_bytes(data);
149    let crc = checksum_crc32(&payload);
150    let header = BinaryHeader {
151        magic: MAGIC,
152        version: FORMAT_VERSION,
153        data_type: FieldDataType::Float64,
154        ndim,
155        dims,
156        checksum: crc,
157    };
158    let mut out = Vec::with_capacity(BinaryHeader::SIZE + payload.len());
159    out.extend_from_slice(&header.to_bytes());
160    out.extend_from_slice(&payload);
161    out
162}
163
164/// Read and validate a binary field written by `write_binary_field`.
165///
166/// Returns `(header, data)` on success, or `Err` describing the problem.
167pub fn read_binary_field(bytes: &[u8]) -> Result<(BinaryHeader, Vec<f64>), String> {
168    if bytes.len() < BinaryHeader::SIZE {
169        return Err("Data too short to contain a header".into());
170    }
171    let header = BinaryHeader::from_bytes(&bytes[..BinaryHeader::SIZE])?;
172
173    if header.magic != MAGIC {
174        return Err(format!("Bad magic: {:?}", header.magic));
175    }
176    if header.version != FORMAT_VERSION {
177        return Err(format!("Unsupported version: {}", header.version));
178    }
179
180    let payload = &bytes[BinaryHeader::SIZE..];
181    verify_integrity(payload, header.checksum)?;
182
183    let data = bytes_to_f64_slice(payload)?;
184    let expected = header.element_count();
185    if data.len() != expected {
186        return Err(format!(
187            "Element count mismatch: header says {}, payload has {}",
188            expected,
189            data.len()
190        ));
191    }
192    Ok((header, data))
193}
194
195// ── RLE compression ───────────────────────────────────────────────────────────
196
197/// A run in the RLE encoding: `(value, count)`.
198#[derive(Debug, Clone, Copy, PartialEq)]
199pub struct RleRun {
200    /// The repeated value.
201    pub value: f64,
202    /// How many times it is repeated.
203    pub count: usize,
204}
205
206/// Run-length encode a `f64` slice.
207///
208/// Adjacent values that are equal (bitwise) are collapsed into a single
209/// `RleRun`. The encoding is lossless.
210pub fn compress_rle(data: &[f64]) -> Vec<RleRun> {
211    if data.is_empty() {
212        return Vec::new();
213    }
214    let mut runs = Vec::new();
215    let mut current_val = data[0];
216    let mut count = 1usize;
217
218    for &v in &data[1..] {
219        if v.to_bits() == current_val.to_bits() {
220            count += 1;
221        } else {
222            runs.push(RleRun {
223                value: current_val,
224                count,
225            });
226            current_val = v;
227            count = 1;
228        }
229    }
230    runs.push(RleRun {
231        value: current_val,
232        count,
233    });
234    runs
235}
236
237/// Decompress RLE runs back into a `f64` Vec.
238pub fn decompress_rle(runs: &[RleRun]) -> Vec<f64> {
239    let total: usize = runs.iter().map(|r| r.count).sum();
240    let mut out = Vec::with_capacity(total);
241    for run in runs {
242        for _ in 0..run.count {
243            out.push(run.value);
244        }
245    }
246    out
247}
248
249// ── Checksum ──────────────────────────────────────────────────────────────────
250
251/// Compute a CRC32 checksum of the given byte slice.
252///
253/// Uses the standard IEEE 802.3 polynomial (0xEDB88320, reflected).
254pub fn checksum_crc32(data: &[u8]) -> u32 {
255    // Build lookup table
256    let table: [u32; 256] = {
257        let mut t = [0u32; 256];
258        for (i, entry) in t.iter_mut().enumerate() {
259            let mut crc = i as u32;
260            for _ in 0..8 {
261                if crc & 1 != 0 {
262                    crc = (crc >> 1) ^ 0xEDB8_8320;
263                } else {
264                    crc >>= 1;
265                }
266            }
267            *entry = crc;
268        }
269        t
270    };
271
272    let mut crc = 0xFFFF_FFFFu32;
273    for &byte in data {
274        let idx = ((crc ^ byte as u32) & 0xFF) as usize;
275        crc = (crc >> 8) ^ table[idx];
276    }
277    crc ^ 0xFFFF_FFFF
278}
279
280/// Verify that `data` matches the expected `checksum`.
281///
282/// Returns `Ok(())` on success or `Err` with a diagnostic message.
283pub fn verify_integrity(data: &[u8], expected: u32) -> Result<(), String> {
284    let actual = checksum_crc32(data);
285    if actual == expected {
286        Ok(())
287    } else {
288        Err(format!(
289            "Checksum mismatch: expected 0x{:08X}, got 0x{:08X}",
290            expected, actual
291        ))
292    }
293}
294
295// ── Tests ─────────────────────────────────────────────────────────────────────
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300
301    fn sample_data() -> Vec<f64> {
302        (0..12).map(|i| i as f64 * 1.5).collect()
303    }
304
305    // Header serialisation
306    #[test]
307    fn test_header_roundtrip() {
308        let header = BinaryHeader {
309            magic: MAGIC,
310            version: FORMAT_VERSION,
311            data_type: FieldDataType::Float64,
312            ndim: 1,
313            dims: [12, 1, 1],
314            checksum: 0xDEAD_BEEF,
315        };
316        let bytes = header.to_bytes();
317        assert_eq!(bytes.len(), BinaryHeader::SIZE);
318        let parsed = BinaryHeader::from_bytes(&bytes).unwrap();
319        assert_eq!(parsed, header);
320    }
321
322    #[test]
323    fn test_header_magic() {
324        let header = BinaryHeader {
325            magic: MAGIC,
326            version: 1,
327            data_type: FieldDataType::Float64,
328            ndim: 1,
329            dims: [1, 1, 1],
330            checksum: 0,
331        };
332        let bytes = header.to_bytes();
333        assert_eq!(&bytes[0..4], b"OXPF");
334    }
335
336    #[test]
337    fn test_header_from_bytes_too_short() {
338        assert!(BinaryHeader::from_bytes(&[0u8; 10]).is_err());
339    }
340
341    #[test]
342    fn test_header_from_bytes_bad_data_type() {
343        let mut bytes = [0u8; BinaryHeader::SIZE];
344        bytes[6] = 99; // invalid data type
345        assert!(BinaryHeader::from_bytes(&bytes).is_err());
346    }
347
348    #[test]
349    fn test_header_element_count_1d() {
350        let h = BinaryHeader {
351            magic: MAGIC,
352            version: 1,
353            data_type: FieldDataType::Float64,
354            ndim: 1,
355            dims: [10, 1, 1],
356            checksum: 0,
357        };
358        assert_eq!(h.element_count(), 10);
359    }
360
361    #[test]
362    fn test_header_element_count_3d() {
363        let h = BinaryHeader {
364            magic: MAGIC,
365            version: 1,
366            data_type: FieldDataType::Float64,
367            ndim: 3,
368            dims: [4, 5, 6],
369            checksum: 0,
370        };
371        assert_eq!(h.element_count(), 120);
372    }
373
374    // Write / read roundtrip
375    #[test]
376    fn test_write_read_roundtrip_1d() {
377        let data = sample_data();
378        let bytes = write_binary_field(&data, [12, 1, 1], 1);
379        let (_hdr, recovered) = read_binary_field(&bytes).unwrap();
380        assert_eq!(recovered.len(), data.len());
381        for (a, b) in data.iter().zip(recovered.iter()) {
382            assert!((a - b).abs() < 1e-15);
383        }
384    }
385
386    #[test]
387    fn test_write_read_roundtrip_3d() {
388        let data: Vec<f64> = (0..60).map(|i| i as f64).collect();
389        let bytes = write_binary_field(&data, [3, 4, 5], 3);
390        let (hdr, recovered) = read_binary_field(&bytes).unwrap();
391        assert_eq!(hdr.dims, [3, 4, 5]);
392        assert_eq!(recovered.len(), 60);
393    }
394
395    #[test]
396    fn test_read_bad_magic() {
397        let mut bytes = write_binary_field(&[1.0, 2.0], [2, 1, 1], 1);
398        bytes[0] = b'X'; // corrupt magic
399        assert!(read_binary_field(&bytes).is_err());
400    }
401
402    #[test]
403    fn test_read_corrupted_payload() {
404        let mut bytes = write_binary_field(&[1.0, 2.0], [2, 1, 1], 1);
405        // Flip a byte in the payload
406        let last = bytes.len() - 1;
407        bytes[last] ^= 0xFF;
408        assert!(read_binary_field(&bytes).is_err());
409    }
410
411    #[test]
412    fn test_read_too_short() {
413        assert!(read_binary_field(&[0u8; 5]).is_err());
414    }
415
416    #[test]
417    fn test_write_read_empty() {
418        let bytes = write_binary_field(&[], [0, 1, 1], 1);
419        let (hdr, data) = read_binary_field(&bytes).unwrap();
420        assert_eq!(hdr.dims[0], 0);
421        assert!(data.is_empty());
422    }
423
424    // RLE compress / decompress
425    #[test]
426    fn test_rle_basic() {
427        let data = vec![1.0, 1.0, 1.0, 2.0, 2.0, 3.0];
428        let runs = compress_rle(&data);
429        assert_eq!(runs.len(), 3);
430        assert_eq!(runs[0].count, 3);
431        assert_eq!(runs[1].count, 2);
432        assert_eq!(runs[2].count, 1);
433    }
434
435    #[test]
436    fn test_rle_empty() {
437        assert!(compress_rle(&[]).is_empty());
438    }
439
440    #[test]
441    fn test_rle_single_element() {
442        let runs = compress_rle(&[42.0]);
443        assert_eq!(runs.len(), 1);
444        assert_eq!(runs[0].count, 1);
445    }
446
447    #[test]
448    fn test_rle_no_repetition() {
449        let data = vec![1.0, 2.0, 3.0];
450        let runs = compress_rle(&data);
451        assert_eq!(runs.len(), 3);
452        for run in &runs {
453            assert_eq!(run.count, 1);
454        }
455    }
456
457    #[test]
458    fn test_rle_roundtrip() {
459        let data = vec![0.0, 0.0, 1.0, 2.0, 2.0, 2.0, 3.0];
460        let runs = compress_rle(&data);
461        let recovered = decompress_rle(&runs);
462        assert_eq!(recovered, data);
463    }
464
465    #[test]
466    fn test_rle_all_same() {
467        let data = vec![5.0; 100];
468        let runs = compress_rle(&data);
469        assert_eq!(runs.len(), 1);
470        assert_eq!(runs[0].count, 100);
471        let recovered = decompress_rle(&runs);
472        assert_eq!(recovered, data);
473    }
474
475    #[test]
476    fn test_rle_decompress_empty() {
477        assert!(decompress_rle(&[]).is_empty());
478    }
479
480    // CRC32 checksum
481    #[test]
482    fn test_checksum_known_value() {
483        // CRC32 of b"123456789" is 0xCBF43926 per IEEE specification
484        let crc = checksum_crc32(b"123456789");
485        assert_eq!(crc, 0xCBF4_3926);
486    }
487
488    #[test]
489    fn test_checksum_empty() {
490        let crc = checksum_crc32(b"");
491        assert_eq!(crc, 0x0000_0000);
492    }
493
494    #[test]
495    fn test_checksum_single_byte() {
496        let c1 = checksum_crc32(b"A");
497        let c2 = checksum_crc32(b"B");
498        assert_ne!(c1, c2);
499    }
500
501    #[test]
502    fn test_checksum_deterministic() {
503        let data = b"physics_data_12345";
504        assert_eq!(checksum_crc32(data), checksum_crc32(data));
505    }
506
507    // verify_integrity
508    #[test]
509    fn test_verify_integrity_ok() {
510        let data = b"hello";
511        let crc = checksum_crc32(data);
512        assert!(verify_integrity(data, crc).is_ok());
513    }
514
515    #[test]
516    fn test_verify_integrity_fail() {
517        let data = b"hello";
518        assert!(verify_integrity(data, 0xDEAD_BEEF).is_err());
519    }
520
521    // FieldDataType
522    #[test]
523    fn test_field_data_type_from_u8() {
524        assert_eq!(FieldDataType::from_u8(0), Some(FieldDataType::Float64));
525        assert_eq!(FieldDataType::from_u8(1), Some(FieldDataType::Float32));
526        assert_eq!(FieldDataType::from_u8(2), Some(FieldDataType::Int32));
527        assert_eq!(FieldDataType::from_u8(99), None);
528    }
529
530    // Integration: write, RLE-encode, compress, decompress, then verify
531    #[test]
532    fn test_write_read_with_rle_consistency() {
533        let original = vec![0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 3.0];
534        let bytes = write_binary_field(&original, [10, 1, 1], 1);
535        let (_hdr, recovered) = read_binary_field(&bytes).unwrap();
536
537        // Apply RLE to the recovered data
538        let runs = compress_rle(&recovered);
539        let decompressed = decompress_rle(&runs);
540        assert_eq!(decompressed, original);
541    }
542}