Skip to main content

numcodecs_ebcc/
lib.rs

1//! [![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs]
2//!
3//! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main
4//! [workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain
5//!
6//! [MSRV]: https://img.shields.io/badge/MSRV-1.87.0-blue
7//! [repo]: https://github.com/juntyr/numcodecs-rs
8//!
9//! [Latest Version]: https://img.shields.io/crates/v/numcodecs-ebcc
10//! [crates.io]: https://crates.io/crates/numcodecs-ebcc
11//!
12//! [Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-ebcc
13//! [docs.rs]: https://docs.rs/numcodecs-ebcc/
14//!
15//! [Rust Doc Main]: https://img.shields.io/badge/docs-main-blue
16//! [docs]: https://juntyr.github.io/numcodecs-rs/numcodecs_ebcc
17//!
18//! EBCC codec implementation for the [`numcodecs`] API.
19
20#![allow(clippy::multiple_crate_versions)] // embedded-io
21
22#[cfg(test)]
23use ::serde_json as _;
24
25use std::borrow::Cow;
26
27use ndarray::{Array, Array1, ArrayBase, ArrayViewMut, Axis, Data, DataMut, Dimension, IxDyn};
28use num_traits::Float;
29use numcodecs::{
30    AnyArray, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, StaticCodec,
31    StaticCodecConfig, StaticCodecVersion,
32};
33use schemars::{JsonSchema, Schema, SchemaGenerator, json_schema};
34use serde::{Deserialize, Deserializer, Serialize, Serializer};
35use thiserror::Error;
36
37type EbccCodecVersion = StaticCodecVersion<0, 1, 0>;
38
39/// Codec providing compression using EBCC.
40///
41/// EBCC combines JPEG2000 compression with error-bounded residual compression.
42///
43/// Arrays that are higher-dimensional than 3D are encoded by compressing each
44/// 3D slice with EBCC independently. Specifically, the array's shape is
45/// interpreted as `[.., depth, height, width]`. If you want to compress 3D
46/// slices along three different axes, you can swizzle the array axes
47/// beforehand.
48#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
49#[schemars(deny_unknown_fields)]
50pub struct EbccCodec {
51    /// EBCC residual compression
52    #[serde(flatten)]
53    pub residual: EbccResidualType,
54    /// JPEG2000 positive base compression ratio
55    pub base_cr: Positive<f32>,
56    /// The codec's encoding format version. Do not provide this parameter explicitly.
57    #[serde(default, rename = "_version")]
58    pub version: EbccCodecVersion,
59}
60
61/// Residual compression types supported by EBCC.
62#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, JsonSchema)]
63#[serde(tag = "residual")]
64#[serde(deny_unknown_fields)]
65pub enum EbccResidualType {
66    #[serde(rename = "jpeg2000-only")]
67    /// No residual compression - base JPEG2000 only
68    Jpeg2000Only,
69    #[serde(rename = "absolute")]
70    /// Residual compression with absolute maximum error bound
71    AbsoluteError {
72        /// The positive maximum absolute error bound
73        error: Positive<f32>,
74    },
75    #[serde(rename = "relative")]
76    /// Residual compression with relative error bound
77    RelativeError {
78        /// The positive maximum relative error bound
79        error: Positive<f32>,
80    },
81}
82
83impl Codec for EbccCodec {
84    type Error = EbccCodecError;
85
86    fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error> {
87        match data {
88            AnyCowArray::F32(data) => Ok(AnyArray::U8(
89                Array1::from(compress(data, self.residual, self.base_cr)?).into_dyn(),
90            )),
91            encoded => Err(EbccCodecError::UnsupportedDtype(encoded.dtype())),
92        }
93    }
94
95    fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error> {
96        let AnyCowArray::U8(encoded) = encoded else {
97            return Err(EbccCodecError::EncodedDataNotBytes {
98                dtype: encoded.dtype(),
99            });
100        };
101
102        if !matches!(encoded.shape(), [_]) {
103            return Err(EbccCodecError::EncodedDataNotOneDimensional {
104                shape: encoded.shape().to_vec(),
105            });
106        }
107
108        decompress(&AnyCowArray::U8(encoded).as_bytes())
109    }
110
111    fn decode_into(
112        &self,
113        encoded: AnyArrayView,
114        decoded: AnyArrayViewMut,
115    ) -> Result<(), Self::Error> {
116        let AnyArrayView::U8(encoded) = encoded else {
117            return Err(EbccCodecError::EncodedDataNotBytes {
118                dtype: encoded.dtype(),
119            });
120        };
121
122        if !matches!(encoded.shape(), [_]) {
123            return Err(EbccCodecError::EncodedDataNotOneDimensional {
124                shape: encoded.shape().to_vec(),
125            });
126        }
127
128        match decoded {
129            AnyArrayViewMut::F32(decoded) => {
130                decompress_into(&AnyArrayView::U8(encoded).as_bytes(), decoded)
131            }
132            decoded => Err(EbccCodecError::UnsupportedDtype(decoded.dtype())),
133        }
134    }
135}
136
137impl StaticCodec for EbccCodec {
138    const CODEC_ID: &'static str = "ebcc.rs";
139
140    type Config<'de> = Self;
141
142    fn from_config(config: Self::Config<'_>) -> Self {
143        config
144    }
145
146    fn get_config(&self) -> StaticCodecConfig<'_, Self> {
147        StaticCodecConfig::from(self)
148    }
149}
150
151/// Errors that may occur when applying the [`EbccCodec`].
152#[derive(Debug, thiserror::Error)]
153pub enum EbccCodecError {
154    /// [`EbccCodec`] does not support the dtype
155    #[error("Ebcc does not support the dtype {0}")]
156    UnsupportedDtype(AnyArrayDType),
157    /// [`EbccCodec`] failed to encode the header
158    #[error("Ebcc failed to encode the header")]
159    HeaderEncodeFailed {
160        /// Opaque source error
161        source: EbccHeaderError,
162    },
163    /// [`EbccCodec`] can only encode >2D data where the last two dimensions
164    /// must be at least 32x32 but received an array with an insufficient shape
165    #[error(
166        "Ebcc can only encode >2D data where the last two dimensions must be at least 32x32 but received an array of shape {shape:?}"
167    )]
168    InsufficientDimensions {
169        /// The unexpected shape of the array
170        shape: Vec<usize>,
171    },
172    /// [`EbccCodec`] failed to encode the data
173    #[error("Ebcc failed to encode the data")]
174    EbccEncodeFailed {
175        /// Opaque source error
176        source: EbccCodingError,
177    },
178    /// [`EbccCodec`] failed to encode a 3D slice
179    #[error("Ebcc failed to encode a 3D slice")]
180    SliceEncodeFailed {
181        /// Opaque source error
182        source: EbccSliceError,
183    },
184    /// [`EbccCodec`] can only decode one-dimensional byte arrays but received
185    /// an array of a different dtype
186    #[error(
187        "Ebcc can only decode one-dimensional byte arrays but received an array of dtype {dtype}"
188    )]
189    EncodedDataNotBytes {
190        /// The unexpected dtype of the encoded array
191        dtype: AnyArrayDType,
192    },
193    /// [`EbccCodec`] can only decode one-dimensional byte arrays but received
194    /// an array of a different shape
195    #[error(
196        "Ebcc can only decode one-dimensional byte arrays but received a byte array of shape {shape:?}"
197    )]
198    EncodedDataNotOneDimensional {
199        /// The unexpected shape of the encoded array
200        shape: Vec<usize>,
201    },
202    /// [`EbccCodec`] failed to decode the header
203    #[error("Ebcc failed to decode the header")]
204    HeaderDecodeFailed {
205        /// Opaque source error
206        source: EbccHeaderError,
207    },
208    /// [`EbccCodec`] cannot decode into an array with a mismatching shape
209    #[error("Ebcc cannot decode an array of shape {decoded:?} into an array of shape {array:?}")]
210    DecodeIntoShapeMismatch {
211        /// The shape of the decoded data
212        decoded: Vec<usize>,
213        /// The mismatching shape of the array to decode into
214        array: Vec<usize>,
215    },
216    /// [`EbccCodec`] failed to decode a 3D slice
217    #[error("Ebcc failed to decode a slice")]
218    SliceDecodeFailed {
219        /// Opaque source error
220        source: EbccSliceError,
221    },
222    /// [`EbccCodec`] failed to decode from an excessive number of slices
223    #[error("Ebcc failed to decode from an excessive number of slices")]
224    DecodeTooManySlices,
225    /// [`EbccCodec`] failed to decode the data
226    #[error("Ebcc failed to decode the data")]
227    EbccDecodeFailed {
228        /// Opaque source error
229        source: EbccCodingError,
230    },
231}
232
233#[expect(clippy::derive_partial_eq_without_eq)] // floats are not Eq
234#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Hash)]
235/// Positive floating point number
236pub struct Positive<T: Float>(T);
237
238impl<T: Float> PartialEq<T> for Positive<T> {
239    fn eq(&self, other: &T) -> bool {
240        self.0 == *other
241    }
242}
243
244impl Serialize for Positive<f32> {
245    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
246        serializer.serialize_f32(self.0)
247    }
248}
249
250impl<'de> Deserialize<'de> for Positive<f32> {
251    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
252        let x = f32::deserialize(deserializer)?;
253
254        if x > 0.0 {
255            Ok(Self(x))
256        } else {
257            Err(serde::de::Error::invalid_value(
258                serde::de::Unexpected::Float(f64::from(x)),
259                &"a positive value",
260            ))
261        }
262    }
263}
264
265impl JsonSchema for Positive<f32> {
266    fn schema_name() -> Cow<'static, str> {
267        Cow::Borrowed("PositiveF32")
268    }
269
270    fn schema_id() -> Cow<'static, str> {
271        Cow::Borrowed(concat!(module_path!(), "::", "Positive<f32>"))
272    }
273
274    fn json_schema(_gen: &mut SchemaGenerator) -> Schema {
275        json_schema!({
276            "type": "number",
277            "exclusiveMinimum": 0.0
278        })
279    }
280}
281
282#[derive(Debug, Error)]
283#[error(transparent)]
284/// Opaque error for when encoding or decoding the header fails
285pub struct EbccHeaderError(postcard::Error);
286
287#[derive(Debug, Error)]
288#[error(transparent)]
289/// Opaque error for when encoding or decoding a 3D slice fails
290pub struct EbccSliceError(postcard::Error);
291
292#[derive(Debug, Error)]
293#[error(transparent)]
294/// Opaque error for when encoding or decoding with EBCC fails
295pub struct EbccCodingError(ebcc::EBCCError);
296
297/// Compress the `data` array using EBCC with the provided `residual` and
298/// `base_cr`.
299///
300/// # Errors
301///
302/// Errors with
303/// - [`EbccCodecError::HeaderEncodeFailed`] if encoding the header failed
304/// - [`EbccCodecError::InsufficientDimensions`] if the `data` has fewer than
305///   two dimensions or the last two dimensions are not at least 32x32
306/// - [`EbccCodecError::EbccEncodeFailed`] if encoding with EBCC failed
307/// - [`EbccCodecError::SliceEncodeFailed`] if encoding a 3D slice failed
308#[allow(clippy::missing_panics_doc)]
309pub fn compress<S: Data<Elem = f32>, D: Dimension>(
310    data: ArrayBase<S, D>,
311    residual: EbccResidualType,
312    base_cr: Positive<f32>,
313) -> Result<Vec<u8>, EbccCodecError> {
314    let mut encoded = postcard::to_extend(
315        &CompressionHeader {
316            dtype: EbccDType::F32,
317            shape: Cow::Borrowed(data.shape()),
318            version: StaticCodecVersion,
319        },
320        Vec::new(),
321    )
322    .map_err(|err| EbccCodecError::HeaderEncodeFailed {
323        source: EbccHeaderError(err),
324    })?;
325
326    // EBCC cannot handle zero-length dimensions
327    if data.is_empty() {
328        return Ok(encoded);
329    }
330
331    let mut chunk_size = Vec::from(data.shape());
332    let (width, height, depth) = match *chunk_size.as_mut_slice() {
333        [ref mut rest @ .., depth, height, width] => {
334            for r in rest {
335                *r = 1;
336            }
337            (width, height, depth)
338        }
339        [height, width] => (width, height, 1),
340        _ => {
341            return Err(EbccCodecError::InsufficientDimensions {
342                shape: Vec::from(data.shape()),
343            });
344        }
345    };
346
347    if (width < 32) || (height < 32) {
348        return Err(EbccCodecError::InsufficientDimensions {
349            shape: Vec::from(data.shape()),
350        });
351    }
352
353    for mut slice in data.into_dyn().exact_chunks(chunk_size.as_slice()) {
354        while slice.ndim() < 3 {
355            slice = slice.insert_axis(Axis(0));
356        }
357        #[expect(clippy::unwrap_used)]
358        // slice must now have at least three axes, and all but the last three
359        //  must be of size 1
360        let slice = slice.into_shape_with_order((depth, height, width)).unwrap();
361
362        let encoded_slice = ebcc::ebcc_encode(
363            slice,
364            &ebcc::EBCCConfig {
365                base_cr: base_cr.0,
366                residual_compression_type: match residual {
367                    EbccResidualType::Jpeg2000Only => ebcc::EBCCResidualType::Jpeg2000Only,
368                    EbccResidualType::AbsoluteError { error } => {
369                        ebcc::EBCCResidualType::AbsoluteError(error.0)
370                    }
371                    EbccResidualType::RelativeError { error } => {
372                        ebcc::EBCCResidualType::RelativeError(error.0)
373                    }
374                },
375            },
376        )
377        .map_err(|err| EbccCodecError::EbccEncodeFailed {
378            source: EbccCodingError(err),
379        })?;
380
381        encoded = postcard::to_extend(encoded_slice.as_slice(), encoded).map_err(|err| {
382            EbccCodecError::SliceEncodeFailed {
383                source: EbccSliceError(err),
384            }
385        })?;
386    }
387
388    Ok(encoded)
389}
390
391/// Decompress the `encoded` data into an array using EBCC.
392///
393/// # Errors
394///
395/// Errors with
396/// - [`EbccCodecError::HeaderDecodeFailed`] if decoding the header failed
397/// - [`EbccCodecError::SliceDecodeFailed`] if decoding a 3D slice failed
398/// - [`EbccCodecError::EbccDecodeFailed`] if decoding with EBCC failed
399/// - [`EbccCodecError::DecodeTooManySlices`] if the encoded data contains
400///   too many slices
401pub fn decompress(encoded: &[u8]) -> Result<AnyArray, EbccCodecError> {
402    fn decompress_typed(
403        encoded: &[u8],
404        shape: &[usize],
405    ) -> Result<Array<f32, IxDyn>, EbccCodecError> {
406        let mut decoded = Array::<f32, _>::zeros(shape);
407        decompress_into_typed(encoded, decoded.view_mut())?;
408        Ok(decoded)
409    }
410
411    let (header, encoded) =
412        postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
413            EbccCodecError::HeaderDecodeFailed {
414                source: EbccHeaderError(err),
415            }
416        })?;
417
418    // Return empty data for zero-size arrays
419    if header.shape.iter().copied().any(|s| s == 0) {
420        return match header.dtype {
421            EbccDType::F32 => Ok(AnyArray::F32(Array::zeros(&*header.shape))),
422        };
423    }
424
425    match header.dtype {
426        EbccDType::F32 => Ok(AnyArray::F32(decompress_typed(encoded, &header.shape)?)),
427    }
428}
429
430/// Decompress the `encoded` data into the `decoded` array using EBCC.
431///
432/// # Errors
433///
434/// Errors with
435/// - [`EbccCodecError::HeaderDecodeFailed`] if decoding the header failed
436/// - [`EbccCodecError::DecodeIntoShapeMismatch`] is the `decoded` array shape
437///   does not match the shape of the decoded data
438/// - [`EbccCodecError::SliceDecodeFailed`] if decoding a 3D slice failed
439/// - [`EbccCodecError::EbccDecodeFailed`] if decoding with EBCC failed
440/// - [`EbccCodecError::DecodeTooManySlices`] if the encoded data contains
441///   too many slices
442pub fn decompress_into<S: DataMut<Elem = f32>, D: Dimension>(
443    encoded: &[u8],
444    decoded: ArrayBase<S, D>,
445) -> Result<(), EbccCodecError> {
446    let (header, encoded) =
447        postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
448            EbccCodecError::HeaderDecodeFailed {
449                source: EbccHeaderError(err),
450            }
451        })?;
452
453    if decoded.shape() != &*header.shape {
454        return Err(EbccCodecError::DecodeIntoShapeMismatch {
455            decoded: header.shape.into_owned(),
456            array: Vec::from(decoded.shape()),
457        });
458    }
459
460    // Return empty data for zero-size arrays
461    if header.shape.iter().copied().any(|s| s == 0) {
462        return match header.dtype {
463            EbccDType::F32 => Ok(()),
464        };
465    }
466
467    match header.dtype {
468        EbccDType::F32 => decompress_into_typed(encoded, decoded.into_dyn().view_mut()),
469    }
470}
471
472fn decompress_into_typed(
473    mut encoded: &[u8],
474    mut decoded: ArrayViewMut<f32, IxDyn>,
475) -> Result<(), EbccCodecError> {
476    let mut chunk_size = Vec::from(decoded.shape());
477    let (width, height, depth) = match *chunk_size.as_mut_slice() {
478        [ref mut rest @ .., depth, height, width] => {
479            for r in rest {
480                *r = 1;
481            }
482            (width, height, depth)
483        }
484        [height, width] => (width, height, 1),
485        [width] => (width, 1, 1),
486        [] => (1, 1, 1),
487    };
488
489    for mut slice in decoded.exact_chunks_mut(chunk_size.as_slice()) {
490        let (encoded_slice, rest) =
491            postcard::take_from_bytes::<Cow<[u8]>>(encoded).map_err(|err| {
492                EbccCodecError::SliceDecodeFailed {
493                    source: EbccSliceError(err),
494                }
495            })?;
496        encoded = rest;
497
498        while slice.ndim() < 3 {
499            slice = slice.insert_axis(Axis(0));
500        }
501        #[expect(clippy::unwrap_used)]
502        // slice must now have at least three axes, and all but the last
503        //  three must be of size 1
504        let slice = slice.into_shape_with_order((depth, height, width)).unwrap();
505
506        ebcc::ebcc_decode_into(&encoded_slice, slice).map_err(|err| {
507            EbccCodecError::EbccDecodeFailed {
508                source: EbccCodingError(err),
509            }
510        })?;
511    }
512
513    if !encoded.is_empty() {
514        return Err(EbccCodecError::DecodeTooManySlices);
515    }
516
517    Ok(())
518}
519
520#[derive(Serialize, Deserialize)]
521struct CompressionHeader<'a> {
522    dtype: EbccDType,
523    #[serde(borrow)]
524    shape: Cow<'a, [usize]>,
525    version: EbccCodecVersion,
526}
527
528/// Dtypes that EBCC can compress and decompress
529#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
530enum EbccDType {
531    #[serde(rename = "f32", alias = "float32")]
532    F32,
533}
534
535#[cfg(test)]
536mod tests {
537    use super::*;
538
539    #[test]
540    fn test_unsupported_dtype() {
541        let codec = EbccCodec {
542            residual: EbccResidualType::Jpeg2000Only,
543            base_cr: Positive(10.0),
544            version: StaticCodecVersion,
545        };
546
547        let data = Array1::<i32>::zeros(100);
548        let result = codec.encode(AnyCowArray::I32(data.into_dyn().into()));
549
550        assert!(matches!(result, Err(EbccCodecError::UnsupportedDtype(_))));
551    }
552
553    #[test]
554    fn test_invalid_dimensions() {
555        let codec = EbccCodec {
556            residual: EbccResidualType::Jpeg2000Only,
557            base_cr: Positive(10.0),
558            version: StaticCodecVersion,
559        };
560
561        // Test dimensions too small (32 < 32x32 requirement)
562        let data = Array::zeros(32);
563        let result = codec.encode(AnyCowArray::F32(data.into_dyn().into()));
564        assert!(
565            matches!(result, Err(EbccCodecError::InsufficientDimensions { shape }) if shape == [32])
566        );
567
568        // Test dimensions too small (16x16 < 32x32 requirement)
569        let data = Array::zeros((16, 16));
570        let result = codec.encode(AnyCowArray::F32(data.into_dyn().into()));
571        assert!(
572            matches!(result, Err(EbccCodecError::InsufficientDimensions { shape }) if shape == [16, 16])
573        );
574
575        // Test mixed valid/invalid dimensions
576        let data = Array::zeros((1, 32, 16));
577        let result = codec.encode(AnyCowArray::F32(data.into_dyn().into()));
578        assert!(
579            matches!(result, Err(EbccCodecError::InsufficientDimensions { shape }) if shape == [1, 32, 16])
580        );
581
582        // Test valid dimensions
583        let data = Array::zeros((1, 32, 32));
584        let result = codec.encode(AnyCowArray::F32(data.into_dyn().into()));
585        assert!(result.is_ok());
586
587        // Test valid dimensions with slicing
588        let data = Array::zeros((2, 2, 2, 32, 32));
589        let result = codec.encode(AnyCowArray::F32(data.into_dyn().into()));
590        assert!(result.is_ok());
591    }
592
593    #[test]
594    fn test_large_array() -> Result<(), EbccCodecError> {
595        // Test with a larger array (similar to small climate dataset)
596        let height = 721; // Quarter degree resolution
597        let width = 1440;
598        let frames = 1;
599
600        #[expect(clippy::suboptimal_flops, clippy::cast_precision_loss)]
601        let data = Array::from_shape_fn((frames, height, width), |(_k, i, j)| {
602            let lat = -90.0 + (i as f32 / height as f32) * 180.0;
603            let lon = -180.0 + (j as f32 / width as f32) * 360.0;
604            #[allow(clippy::let_and_return)]
605            let temp = 273.15 + 30.0 * (1.0 - lat.abs() / 90.0) + 5.0 * (lon / 180.0).sin();
606            temp
607        });
608
609        let codec_error = 0.1;
610        let codec = EbccCodec {
611            residual: EbccResidualType::AbsoluteError {
612                error: Positive(codec_error),
613            },
614            base_cr: Positive(20.0),
615            version: StaticCodecVersion,
616        };
617
618        let encoded = codec.encode(AnyArray::F32(data.clone().into_dyn()).into_cow())?;
619        let decoded = codec.decode(encoded.cow())?;
620
621        let AnyArray::U8(encoded) = encoded else {
622            return Err(EbccCodecError::EncodedDataNotBytes {
623                dtype: encoded.dtype(),
624            });
625        };
626
627        let AnyArray::F32(decoded) = decoded else {
628            return Err(EbccCodecError::UnsupportedDtype(decoded.dtype()));
629        };
630
631        // Check compression ratio
632        let original_size = data.len() * std::mem::size_of::<f32>();
633        #[allow(clippy::cast_precision_loss)]
634        let compression_ratio = original_size as f64 / encoded.len() as f64;
635
636        assert!(
637            compression_ratio > 5.0,
638            "Compression ratio {compression_ratio} should be at least 5:1",
639        );
640
641        // Check error bound is respected
642        let max_error = data
643            .iter()
644            .zip(decoded.iter())
645            .map(|(&orig, &decomp)| (orig - decomp).abs())
646            .fold(0.0f32, f32::max);
647
648        assert!(
649            max_error <= (codec_error + 1e-6),
650            "Max error {max_error} exceeds error bound {codec_error}",
651        );
652
653        Ok(())
654    }
655}