Skip to main content

numcodecs_ebcc/
lib.rs

1//! [![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs]
2//!
3//! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main
4//! [workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain
5//!
6//! [MSRV]: https://img.shields.io/badge/MSRV-1.87.0-blue
7//! [repo]: https://github.com/juntyr/numcodecs-rs
8//!
9//! [Latest Version]: https://img.shields.io/crates/v/numcodecs-ebcc
10//! [crates.io]: https://crates.io/crates/numcodecs-ebcc
11//!
12//! [Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-ebcc
13//! [docs.rs]: https://docs.rs/numcodecs-ebcc/
14//!
15//! [Rust Doc Main]: https://img.shields.io/badge/docs-main-blue
16//! [docs]: https://juntyr.github.io/numcodecs-rs/numcodecs_ebcc
17//!
18//! EBCC codec implementation for the [`numcodecs`] API.
19
20#![allow(clippy::multiple_crate_versions)] // embedded-io
21
22#[cfg(test)]
23use ::serde_json as _;
24
25use std::borrow::Cow;
26
27use ndarray::{Array, Array1, ArrayBase, ArrayViewMut, Axis, Data, DataMut, Dimension, IxDyn};
28use num_traits::Float;
29use numcodecs::{
30    AnyArray, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, StaticCodec,
31    StaticCodecConfig, StaticCodecVersion,
32};
33use schemars::{JsonSchema, Schema, SchemaGenerator, json_schema};
34use serde::{Deserialize, Deserializer, Serialize, Serializer};
35use thiserror::Error;
36
37type EbccCodecVersion = StaticCodecVersion<0, 1, 0>;
38
39/// EBCC codec implementation for the [`numcodecs`] API.
40///
41/// EBCC combines JPEG2000 compression with error-bounded residual compression.
42#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
43#[schemars(deny_unknown_fields)]
44pub struct EbccCodec {
45    /// EBCC residual compression
46    #[serde(flatten)]
47    pub residual: EbccResidualType,
48    /// JPEG2000 positive base compression ratio
49    pub base_cr: Positive<f32>,
50    /// The codec's encoding format version. Do not provide this parameter explicitly.
51    #[serde(default, rename = "_version")]
52    pub version: EbccCodecVersion,
53}
54
55/// Residual compression types supported by EBCC.
56#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, JsonSchema)]
57#[serde(tag = "residual")]
58#[serde(deny_unknown_fields)]
59pub enum EbccResidualType {
60    #[serde(rename = "jpeg2000-only")]
61    /// No residual compression - base JPEG2000 only
62    Jpeg2000Only,
63    #[serde(rename = "absolute")]
64    /// Residual compression with absolute maximum error bound
65    AbsoluteError {
66        /// The positive maximum absolute error bound
67        error: Positive<f32>,
68    },
69    #[serde(rename = "relative")]
70    /// Residual compression with relative error bound
71    RelativeError {
72        /// The positive maximum relative error bound
73        error: Positive<f32>,
74    },
75}
76
77impl Codec for EbccCodec {
78    type Error = EbccCodecError;
79
80    fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error> {
81        match data {
82            AnyCowArray::F32(data) => Ok(AnyArray::U8(
83                Array1::from(compress(data, self.residual, self.base_cr)?).into_dyn(),
84            )),
85            encoded => Err(EbccCodecError::UnsupportedDtype(encoded.dtype())),
86        }
87    }
88
89    fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error> {
90        let AnyCowArray::U8(encoded) = encoded else {
91            return Err(EbccCodecError::EncodedDataNotBytes {
92                dtype: encoded.dtype(),
93            });
94        };
95
96        if !matches!(encoded.shape(), [_]) {
97            return Err(EbccCodecError::EncodedDataNotOneDimensional {
98                shape: encoded.shape().to_vec(),
99            });
100        }
101
102        decompress(&AnyCowArray::U8(encoded).as_bytes())
103    }
104
105    fn decode_into(
106        &self,
107        encoded: AnyArrayView,
108        decoded: AnyArrayViewMut,
109    ) -> Result<(), Self::Error> {
110        let AnyArrayView::U8(encoded) = encoded else {
111            return Err(EbccCodecError::EncodedDataNotBytes {
112                dtype: encoded.dtype(),
113            });
114        };
115
116        if !matches!(encoded.shape(), [_]) {
117            return Err(EbccCodecError::EncodedDataNotOneDimensional {
118                shape: encoded.shape().to_vec(),
119            });
120        }
121
122        match decoded {
123            AnyArrayViewMut::F32(decoded) => {
124                decompress_into(&AnyArrayView::U8(encoded).as_bytes(), decoded)
125            }
126            decoded => Err(EbccCodecError::UnsupportedDtype(decoded.dtype())),
127        }
128    }
129}
130
131impl StaticCodec for EbccCodec {
132    const CODEC_ID: &'static str = "ebcc.rs";
133
134    type Config<'de> = Self;
135
136    fn from_config(config: Self::Config<'_>) -> Self {
137        config
138    }
139
140    fn get_config(&self) -> StaticCodecConfig<'_, Self> {
141        StaticCodecConfig::from(self)
142    }
143}
144
145/// Errors that may occur when applying the [`EbccCodec`].
146#[derive(Debug, thiserror::Error)]
147pub enum EbccCodecError {
148    /// [`EbccCodec`] does not support the dtype
149    #[error("Ebcc does not support the dtype {0}")]
150    UnsupportedDtype(AnyArrayDType),
151    /// [`EbccCodec`] failed to encode the header
152    #[error("Ebcc failed to encode the header")]
153    HeaderEncodeFailed {
154        /// Opaque source error
155        source: EbccHeaderError,
156    },
157    /// [`EbccCodec`] can only encode >2D data where the last two dimensions
158    /// must be at least 32x32 but received an array with an insufficient shape
159    #[error(
160        "Ebcc can only encode >2D data where the last two dimensions must be at least 32x32 but received an array of shape {shape:?}"
161    )]
162    InsufficientDimensions {
163        /// The unexpected shape of the array
164        shape: Vec<usize>,
165    },
166    /// [`EbccCodec`] failed to encode the data
167    #[error("Ebcc failed to encode the data")]
168    EbccEncodeFailed {
169        /// Opaque source error
170        source: EbccCodingError,
171    },
172    /// [`EbccCodec`] failed to encode a 3D slice
173    #[error("Ebcc failed to encode a 3D slice")]
174    SliceEncodeFailed {
175        /// Opaque source error
176        source: EbccSliceError,
177    },
178    /// [`EbccCodec`] can only decode one-dimensional byte arrays but received
179    /// an array of a different dtype
180    #[error(
181        "Ebcc can only decode one-dimensional byte arrays but received an array of dtype {dtype}"
182    )]
183    EncodedDataNotBytes {
184        /// The unexpected dtype of the encoded array
185        dtype: AnyArrayDType,
186    },
187    /// [`EbccCodec`] can only decode one-dimensional byte arrays but received
188    /// an array of a different shape
189    #[error(
190        "Ebcc can only decode one-dimensional byte arrays but received a byte array of shape {shape:?}"
191    )]
192    EncodedDataNotOneDimensional {
193        /// The unexpected shape of the encoded array
194        shape: Vec<usize>,
195    },
196    /// [`EbccCodec`] failed to decode the header
197    #[error("Ebcc failed to decode the header")]
198    HeaderDecodeFailed {
199        /// Opaque source error
200        source: EbccHeaderError,
201    },
202    /// [`EbccCodec`] cannot decode into an array with a mismatching shape
203    #[error("Ebcc cannot decode an array of shape {decoded:?} into an array of shape {array:?}")]
204    DecodeIntoShapeMismatch {
205        /// The shape of the decoded data
206        decoded: Vec<usize>,
207        /// The mismatching shape of the array to decode into
208        array: Vec<usize>,
209    },
210    /// [`EbccCodec`] failed to decode a 3D slice
211    #[error("Ebcc failed to decode a slice")]
212    SliceDecodeFailed {
213        /// Opaque source error
214        source: EbccSliceError,
215    },
216    /// [`EbccCodec`] failed to decode from an excessive number of slices
217    #[error("Ebcc failed to decode from an excessive number of slices")]
218    DecodeTooManySlices,
219    /// [`EbccCodec`] failed to decode the data
220    #[error("Ebcc failed to decode the data")]
221    EbccDecodeFailed {
222        /// Opaque source error
223        source: EbccCodingError,
224    },
225}
226
227#[expect(clippy::derive_partial_eq_without_eq)] // floats are not Eq
228#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Hash)]
229/// Positive floating point number
230pub struct Positive<T: Float>(T);
231
232impl<T: Float> PartialEq<T> for Positive<T> {
233    fn eq(&self, other: &T) -> bool {
234        self.0 == *other
235    }
236}
237
238impl Serialize for Positive<f32> {
239    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
240        serializer.serialize_f32(self.0)
241    }
242}
243
244impl<'de> Deserialize<'de> for Positive<f32> {
245    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
246        let x = f32::deserialize(deserializer)?;
247
248        if x > 0.0 {
249            Ok(Self(x))
250        } else {
251            Err(serde::de::Error::invalid_value(
252                serde::de::Unexpected::Float(f64::from(x)),
253                &"a positive value",
254            ))
255        }
256    }
257}
258
259impl JsonSchema for Positive<f32> {
260    fn schema_name() -> Cow<'static, str> {
261        Cow::Borrowed("PositiveF32")
262    }
263
264    fn schema_id() -> Cow<'static, str> {
265        Cow::Borrowed(concat!(module_path!(), "::", "Positive<f32>"))
266    }
267
268    fn json_schema(_gen: &mut SchemaGenerator) -> Schema {
269        json_schema!({
270            "type": "number",
271            "exclusiveMinimum": 0.0
272        })
273    }
274}
275
276#[derive(Debug, Error)]
277#[error(transparent)]
278/// Opaque error for when encoding or decoding the header fails
279pub struct EbccHeaderError(postcard::Error);
280
281#[derive(Debug, Error)]
282#[error(transparent)]
283/// Opaque error for when encoding or decoding a 3D slice fails
284pub struct EbccSliceError(postcard::Error);
285
286#[derive(Debug, Error)]
287#[error(transparent)]
288/// Opaque error for when encoding or decoding with EBCC fails
289pub struct EbccCodingError(ebcc::EBCCError);
290
291/// Compress the `data` array using EBCC with the provided `residual` and
292/// `base_cr`.
293///
294/// # Errors
295///
296/// Errors with
297/// - [`EbccCodecError::HeaderEncodeFailed`] if encoding the header failed
298/// - [`EbccCodecError::InsufficientDimensions`] if the `data` has fewer than
299///   two dimensions or the last two dimensions are not at least 32x32
300/// - [`EbccCodecError::EbccEncodeFailed`] if encoding with EBCC failed
301/// - [`EbccCodecError::SliceEncodeFailed`] if encoding a 3D slice failed
302#[allow(clippy::missing_panics_doc)]
303pub fn compress<S: Data<Elem = f32>, D: Dimension>(
304    data: ArrayBase<S, D>,
305    residual: EbccResidualType,
306    base_cr: Positive<f32>,
307) -> Result<Vec<u8>, EbccCodecError> {
308    let mut encoded = postcard::to_extend(
309        &CompressionHeader {
310            dtype: EbccDType::F32,
311            shape: Cow::Borrowed(data.shape()),
312            version: StaticCodecVersion,
313        },
314        Vec::new(),
315    )
316    .map_err(|err| EbccCodecError::HeaderEncodeFailed {
317        source: EbccHeaderError(err),
318    })?;
319
320    // EBCC cannot handle zero-length dimensions
321    if data.is_empty() {
322        return Ok(encoded);
323    }
324
325    let mut chunk_size = Vec::from(data.shape());
326    let (width, height, depth) = match *chunk_size.as_mut_slice() {
327        [ref mut rest @ .., depth, height, width] => {
328            for r in rest {
329                *r = 1;
330            }
331            (width, height, depth)
332        }
333        [height, width] => (width, height, 1),
334        _ => {
335            return Err(EbccCodecError::InsufficientDimensions {
336                shape: Vec::from(data.shape()),
337            });
338        }
339    };
340
341    if (width < 32) || (height < 32) {
342        return Err(EbccCodecError::InsufficientDimensions {
343            shape: Vec::from(data.shape()),
344        });
345    }
346
347    for mut slice in data.into_dyn().exact_chunks(chunk_size.as_slice()) {
348        while slice.ndim() < 3 {
349            slice = slice.insert_axis(Axis(0));
350        }
351        #[expect(clippy::unwrap_used)]
352        // slice must now have at least three axes, and all but the last three
353        //  must be of size 1
354        let slice = slice.into_shape_with_order((depth, height, width)).unwrap();
355
356        let encoded_slice = ebcc::ebcc_encode(
357            slice,
358            &ebcc::EBCCConfig {
359                base_cr: base_cr.0,
360                residual_compression_type: match residual {
361                    EbccResidualType::Jpeg2000Only => ebcc::EBCCResidualType::Jpeg2000Only,
362                    EbccResidualType::AbsoluteError { error } => {
363                        ebcc::EBCCResidualType::AbsoluteError(error.0)
364                    }
365                    EbccResidualType::RelativeError { error } => {
366                        ebcc::EBCCResidualType::RelativeError(error.0)
367                    }
368                },
369            },
370        )
371        .map_err(|err| EbccCodecError::EbccEncodeFailed {
372            source: EbccCodingError(err),
373        })?;
374
375        encoded = postcard::to_extend(encoded_slice.as_slice(), encoded).map_err(|err| {
376            EbccCodecError::SliceEncodeFailed {
377                source: EbccSliceError(err),
378            }
379        })?;
380    }
381
382    Ok(encoded)
383}
384
385/// Decompress the `encoded` data into an array using EBCC.
386///
387/// # Errors
388///
389/// Errors with
390/// - [`EbccCodecError::HeaderDecodeFailed`] if decoding the header failed
391/// - [`EbccCodecError::SliceDecodeFailed`] if decoding a 3D slice failed
392/// - [`EbccCodecError::EbccDecodeFailed`] if decoding with EBCC failed
393/// - [`EbccCodecError::DecodeTooManySlices`] if the encoded data contains
394///   too many slices
395pub fn decompress(encoded: &[u8]) -> Result<AnyArray, EbccCodecError> {
396    fn decompress_typed(
397        encoded: &[u8],
398        shape: &[usize],
399    ) -> Result<Array<f32, IxDyn>, EbccCodecError> {
400        let mut decoded = Array::<f32, _>::zeros(shape);
401        decompress_into_typed(encoded, decoded.view_mut())?;
402        Ok(decoded)
403    }
404
405    let (header, encoded) =
406        postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
407            EbccCodecError::HeaderDecodeFailed {
408                source: EbccHeaderError(err),
409            }
410        })?;
411
412    // Return empty data for zero-size arrays
413    if header.shape.iter().copied().any(|s| s == 0) {
414        return match header.dtype {
415            EbccDType::F32 => Ok(AnyArray::F32(Array::zeros(&*header.shape))),
416        };
417    }
418
419    match header.dtype {
420        EbccDType::F32 => Ok(AnyArray::F32(decompress_typed(encoded, &header.shape)?)),
421    }
422}
423
424/// Decompress the `encoded` data into the `decoded` array using EBCC.
425///
426/// # Errors
427///
428/// Errors with
429/// - [`EbccCodecError::HeaderDecodeFailed`] if decoding the header failed
430/// - [`EbccCodecError::DecodeIntoShapeMismatch`] is the `decoded` array shape
431///   does not match the shape of the decoded data
432/// - [`EbccCodecError::SliceDecodeFailed`] if decoding a 3D slice failed
433/// - [`EbccCodecError::EbccDecodeFailed`] if decoding with EBCC failed
434/// - [`EbccCodecError::DecodeTooManySlices`] if the encoded data contains
435///   too many slices
436pub fn decompress_into<S: DataMut<Elem = f32>, D: Dimension>(
437    encoded: &[u8],
438    decoded: ArrayBase<S, D>,
439) -> Result<(), EbccCodecError> {
440    let (header, encoded) =
441        postcard::take_from_bytes::<CompressionHeader>(encoded).map_err(|err| {
442            EbccCodecError::HeaderDecodeFailed {
443                source: EbccHeaderError(err),
444            }
445        })?;
446
447    if decoded.shape() != &*header.shape {
448        return Err(EbccCodecError::DecodeIntoShapeMismatch {
449            decoded: header.shape.into_owned(),
450            array: Vec::from(decoded.shape()),
451        });
452    }
453
454    // Return empty data for zero-size arrays
455    if header.shape.iter().copied().any(|s| s == 0) {
456        return match header.dtype {
457            EbccDType::F32 => Ok(()),
458        };
459    }
460
461    match header.dtype {
462        EbccDType::F32 => decompress_into_typed(encoded, decoded.into_dyn().view_mut()),
463    }
464}
465
466fn decompress_into_typed(
467    mut encoded: &[u8],
468    mut decoded: ArrayViewMut<f32, IxDyn>,
469) -> Result<(), EbccCodecError> {
470    let mut chunk_size = Vec::from(decoded.shape());
471    let (width, height, depth) = match *chunk_size.as_mut_slice() {
472        [ref mut rest @ .., depth, height, width] => {
473            for r in rest {
474                *r = 1;
475            }
476            (width, height, depth)
477        }
478        [height, width] => (width, height, 1),
479        [width] => (width, 1, 1),
480        [] => (1, 1, 1),
481    };
482
483    for mut slice in decoded.exact_chunks_mut(chunk_size.as_slice()) {
484        let (encoded_slice, rest) =
485            postcard::take_from_bytes::<Cow<[u8]>>(encoded).map_err(|err| {
486                EbccCodecError::SliceDecodeFailed {
487                    source: EbccSliceError(err),
488                }
489            })?;
490        encoded = rest;
491
492        while slice.ndim() < 3 {
493            slice = slice.insert_axis(Axis(0));
494        }
495        #[expect(clippy::unwrap_used)]
496        // slice must now have at least three axes, and all but the last
497        //  three must be of size 1
498        let slice = slice.into_shape_with_order((depth, height, width)).unwrap();
499
500        ebcc::ebcc_decode_into(&encoded_slice, slice).map_err(|err| {
501            EbccCodecError::EbccDecodeFailed {
502                source: EbccCodingError(err),
503            }
504        })?;
505    }
506
507    if !encoded.is_empty() {
508        return Err(EbccCodecError::DecodeTooManySlices);
509    }
510
511    Ok(())
512}
513
514#[derive(Serialize, Deserialize)]
515struct CompressionHeader<'a> {
516    dtype: EbccDType,
517    #[serde(borrow)]
518    shape: Cow<'a, [usize]>,
519    version: EbccCodecVersion,
520}
521
522/// Dtypes that EBCC can compress and decompress
523#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
524enum EbccDType {
525    #[serde(rename = "f32", alias = "float32")]
526    F32,
527}
528
529#[cfg(test)]
530mod tests {
531    use super::*;
532
533    #[test]
534    fn test_unsupported_dtype() {
535        let codec = EbccCodec {
536            residual: EbccResidualType::Jpeg2000Only,
537            base_cr: Positive(10.0),
538            version: StaticCodecVersion,
539        };
540
541        let data = Array1::<i32>::zeros(100);
542        let result = codec.encode(AnyCowArray::I32(data.into_dyn().into()));
543
544        assert!(matches!(result, Err(EbccCodecError::UnsupportedDtype(_))));
545    }
546
547    #[test]
548    fn test_invalid_dimensions() {
549        let codec = EbccCodec {
550            residual: EbccResidualType::Jpeg2000Only,
551            base_cr: Positive(10.0),
552            version: StaticCodecVersion,
553        };
554
555        // Test dimensions too small (32 < 32x32 requirement)
556        let data = Array::zeros(32);
557        let result = codec.encode(AnyCowArray::F32(data.into_dyn().into()));
558        assert!(
559            matches!(result, Err(EbccCodecError::InsufficientDimensions { shape }) if shape == [32])
560        );
561
562        // Test dimensions too small (16x16 < 32x32 requirement)
563        let data = Array::zeros((16, 16));
564        let result = codec.encode(AnyCowArray::F32(data.into_dyn().into()));
565        assert!(
566            matches!(result, Err(EbccCodecError::InsufficientDimensions { shape }) if shape == [16, 16])
567        );
568
569        // Test mixed valid/invalid dimensions
570        let data = Array::zeros((1, 32, 16));
571        let result = codec.encode(AnyCowArray::F32(data.into_dyn().into()));
572        assert!(
573            matches!(result, Err(EbccCodecError::InsufficientDimensions { shape }) if shape == [1, 32, 16])
574        );
575
576        // Test valid dimensions
577        let data = Array::zeros((1, 32, 32));
578        let result = codec.encode(AnyCowArray::F32(data.into_dyn().into()));
579        assert!(result.is_ok());
580
581        // Test valid dimensions with slicing
582        let data = Array::zeros((2, 2, 2, 32, 32));
583        let result = codec.encode(AnyCowArray::F32(data.into_dyn().into()));
584        assert!(result.is_ok());
585    }
586
587    #[test]
588    fn test_large_array() -> Result<(), EbccCodecError> {
589        // Test with a larger array (similar to small climate dataset)
590        let height = 721; // Quarter degree resolution
591        let width = 1440;
592        let frames = 1;
593
594        #[expect(clippy::suboptimal_flops, clippy::cast_precision_loss)]
595        let data = Array::from_shape_fn((frames, height, width), |(_k, i, j)| {
596            let lat = -90.0 + (i as f32 / height as f32) * 180.0;
597            let lon = -180.0 + (j as f32 / width as f32) * 360.0;
598            #[allow(clippy::let_and_return)]
599            let temp = 273.15 + 30.0 * (1.0 - lat.abs() / 90.0) + 5.0 * (lon / 180.0).sin();
600            temp
601        });
602
603        let codec_error = 0.1;
604        let codec = EbccCodec {
605            residual: EbccResidualType::AbsoluteError {
606                error: Positive(codec_error),
607            },
608            base_cr: Positive(20.0),
609            version: StaticCodecVersion,
610        };
611
612        let encoded = codec.encode(AnyArray::F32(data.clone().into_dyn()).into_cow())?;
613        let decoded = codec.decode(encoded.cow())?;
614
615        let AnyArray::U8(encoded) = encoded else {
616            return Err(EbccCodecError::EncodedDataNotBytes {
617                dtype: encoded.dtype(),
618            });
619        };
620
621        let AnyArray::F32(decoded) = decoded else {
622            return Err(EbccCodecError::UnsupportedDtype(decoded.dtype()));
623        };
624
625        // Check compression ratio
626        let original_size = data.len() * std::mem::size_of::<f32>();
627        #[allow(clippy::cast_precision_loss)]
628        let compression_ratio = original_size as f64 / encoded.len() as f64;
629
630        assert!(
631            compression_ratio > 5.0,
632            "Compression ratio {compression_ratio} should be at least 5:1",
633        );
634
635        // Check error bound is respected
636        let max_error = data
637            .iter()
638            .zip(decoded.iter())
639            .map(|(&orig, &decomp)| (orig - decomp).abs())
640            .fold(0.0f32, f32::max);
641
642        assert!(
643            max_error <= (codec_error + 1e-6),
644            "Max error {max_error} exceeds error bound {codec_error}",
645        );
646
647        Ok(())
648    }
649}