Skip to main content

zarrs/array/codec/array_to_bytes/
pcodec.rs

1//! The `pcodec` array to bytes codec (Experimental).
2//!
3//! <div class="warning">
4//! This codec is experimental and may be incompatible with other Zarr V3 implementations.
5//! </div>
6//!
7//! [Pcodec](https://github.com/mwlon/pcodec) (or Pco, pronounced "pico") losslessly compresses and decompresses numerical sequences with high compression ratio and fast speed.
8//!
9//! This codec requires the `pcodec` feature, which is disabled by default.
10//!
11//! ### Compatible Implementations:
12//! This codec is fully compatible with the `numcodecs.pcodec` codec in `zarr-python`.
13//!
14//! ### Specification
15//! - <https://github.com/zarr-developers/zarr-extensions/tree/numcodecs/codecs/numcodecs.pcodec>
16//!
17//! ### Codec `name` Aliases (Zarr V3)
18//! - `numcodecs.pcodec`
19//! - `https://codec.zarrs.dev/array_to_bytes/pcodec`
20//!
21//! ### Codec `id` Aliases (Zarr V2)
22//! - `pcodec`
23//!
24//! ### Codec `configuration` Example - [`PcodecCodecConfiguration`]:
25//! ```rust
26//! # let JSON = r#"
27//! {
28//!     "level": 5,
29//!     "mode_spec": "auto",
30//!     "delta_spec": "auto",
31//!     "paging_spec": "equal_pages_up_to",
32//!     "delta_encoding_order": null,
33//!     "equal_pages_up_to": 262144
34//! }
35//! # "#;
36//! # use zarrs::metadata_ext::codec::pcodec::PcodecCodecConfiguration;
37//! # serde_json::from_str::<PcodecCodecConfiguration>(JSON).unwrap();
38//! ```
39
40mod pcodec_codec;
41
42use std::sync::Arc;
43
44pub use pcodec_codec::PcodecCodec;
45use zarrs_metadata::v2::MetadataV2;
46use zarrs_metadata::v3::MetadataV3;
47
48use zarrs_codec::{Codec, CodecPluginV2, CodecPluginV3, CodecTraitsV2, CodecTraitsV3};
49pub use zarrs_metadata_ext::codec::pcodec::{
50    PcodecCodecConfiguration, PcodecCodecConfigurationV1, PcodecCompressionLevel,
51    PcodecDeltaEncodingOrder,
52};
53use zarrs_plugin::PluginCreateError;
54
55zarrs_plugin::impl_extension_aliases!(PcodecCodec,
56    v3: "numcodecs.pcodec", ["https://codec.zarrs.dev/array_to_bytes/pcodec"],
57    v2: "pcodec"
58);
59
60// Register the V3 codec.
61inventory::submit! {
62    CodecPluginV3::new::<PcodecCodec>()
63}
64// Register the V2 codec.
65inventory::submit! {
66    CodecPluginV2::new::<PcodecCodec>()
67}
68
69impl CodecTraitsV3 for PcodecCodec {
70    fn create(metadata: &MetadataV3) -> Result<Codec, PluginCreateError> {
71        let configuration = metadata.to_typed_configuration()?;
72        let codec = Arc::new(PcodecCodec::new_with_configuration(&configuration)?);
73        Ok(Codec::ArrayToBytes(codec))
74    }
75}
76
77impl CodecTraitsV2 for PcodecCodec {
78    fn create(metadata: &MetadataV2) -> Result<Codec, PluginCreateError> {
79        let configuration: PcodecCodecConfiguration = metadata.to_typed_configuration()?;
80        let codec = Arc::new(PcodecCodec::new_with_configuration(&configuration)?);
81        Ok(Codec::ArrayToBytes(codec))
82    }
83}
84
85// Re-export the trait and macro from zarrs_data_type
86pub use zarrs_data_type::codec_traits::pcodec::{
87    PcodecDataTypeExt, PcodecDataTypePlugin, PcodecDataTypeTraits, PcodecElementType,
88    impl_pcodec_data_type_traits,
89};
90
91#[cfg(test)]
92mod tests {
93    use std::num::NonZeroU64;
94    use std::sync::Arc;
95
96    use super::*;
97    use crate::array::{
98        ArrayBytes, ArraySubset, ChunkShape, ChunkShapeTraits, DataType, FillValue, data_type,
99        transmute_to_bytes_vec,
100    };
101    use zarrs_codec::{ArrayToBytesCodecTraits, BytesPartialDecoderTraits, CodecOptions};
102
103    const JSON_VALID: &str = r#"{
104        "level": 8,
105        "delta_encoding_order": 2,
106        "mode_spec": "auto",
107        "equal_pages_up_to": 262144
108    }"#;
109
110    #[test]
111    fn codec_pcodec_configuration() {
112        let codec_configuration: PcodecCodecConfiguration =
113            serde_json::from_str(JSON_VALID).unwrap();
114        let _ = PcodecCodec::new_with_configuration(&codec_configuration);
115    }
116
117    fn codec_pcodec_round_trip_impl(
118        codec: &PcodecCodec,
119        data_type: DataType,
120        fill_value: impl Into<FillValue>,
121    ) -> Result<(), Box<dyn std::error::Error>> {
122        let chunk_shape = vec![NonZeroU64::new(10).unwrap(), NonZeroU64::new(10).unwrap()];
123        let fill_value = fill_value.into();
124        let size = chunk_shape.num_elements_usize() * data_type.fixed_size().unwrap();
125        let bytes: Vec<u8> = (0..size).map(|s| s as u8).collect();
126        let bytes: ArrayBytes = bytes.into();
127
128        let max_encoded_size =
129            codec.encoded_representation(chunk_shape.as_slice(), &data_type, &fill_value)?;
130        let encoded = codec.encode(
131            bytes.clone(),
132            chunk_shape.as_slice(),
133            &data_type,
134            &fill_value,
135            &CodecOptions::default(),
136        )?;
137        assert!((encoded.len() as u64) <= max_encoded_size.size().unwrap());
138        let decoded = codec
139            .decode(
140                encoded,
141                chunk_shape.as_slice(),
142                &data_type,
143                &fill_value,
144                &CodecOptions::default(),
145            )
146            .unwrap();
147        assert_eq!(bytes, decoded);
148        Ok(())
149    }
150
151    #[test]
152    fn codec_pcodec_round_trip_u16() {
153        codec_pcodec_round_trip_impl(
154            &PcodecCodec::new_with_configuration(&serde_json::from_str(JSON_VALID).unwrap())
155                .unwrap(),
156            data_type::uint16(),
157            0u16,
158        )
159        .unwrap();
160    }
161
162    #[test]
163    fn codec_pcodec_round_trip_u32() {
164        codec_pcodec_round_trip_impl(
165            &PcodecCodec::new_with_configuration(&serde_json::from_str(JSON_VALID).unwrap())
166                .unwrap(),
167            data_type::uint32(),
168            0u32,
169        )
170        .unwrap();
171    }
172
173    #[test]
174    fn codec_pcodec_round_trip_u64() {
175        codec_pcodec_round_trip_impl(
176            &PcodecCodec::new_with_configuration(&serde_json::from_str(JSON_VALID).unwrap())
177                .unwrap(),
178            data_type::uint64(),
179            0u64,
180        )
181        .unwrap();
182    }
183
184    #[test]
185    fn codec_pcodec_round_trip_i16() {
186        codec_pcodec_round_trip_impl(
187            &PcodecCodec::new_with_configuration(&serde_json::from_str(JSON_VALID).unwrap())
188                .unwrap(),
189            data_type::int16(),
190            0i16,
191        )
192        .unwrap();
193    }
194
195    #[test]
196    fn codec_pcodec_round_trip_i32() {
197        codec_pcodec_round_trip_impl(
198            &PcodecCodec::new_with_configuration(&serde_json::from_str(JSON_VALID).unwrap())
199                .unwrap(),
200            data_type::int32(),
201            0i32,
202        )
203        .unwrap();
204    }
205
206    #[test]
207    fn codec_pcodec_round_trip_i64() {
208        codec_pcodec_round_trip_impl(
209            &PcodecCodec::new_with_configuration(&serde_json::from_str(JSON_VALID).unwrap())
210                .unwrap(),
211            data_type::int64(),
212            0i64,
213        )
214        .unwrap();
215    }
216
217    #[test]
218    fn codec_pcodec_round_trip_f16() {
219        codec_pcodec_round_trip_impl(
220            &PcodecCodec::new_with_configuration(&serde_json::from_str(JSON_VALID).unwrap())
221                .unwrap(),
222            data_type::float16(),
223            half::f16::from_f32(0.0),
224        )
225        .unwrap();
226    }
227
228    #[test]
229    fn codec_pcodec_round_trip_f32() {
230        codec_pcodec_round_trip_impl(
231            &PcodecCodec::new_with_configuration(&serde_json::from_str(JSON_VALID).unwrap())
232                .unwrap(),
233            data_type::float32(),
234            0f32,
235        )
236        .unwrap();
237    }
238
239    #[test]
240    fn codec_pcodec_round_trip_f64() {
241        codec_pcodec_round_trip_impl(
242            &PcodecCodec::new_with_configuration(&serde_json::from_str(JSON_VALID).unwrap())
243                .unwrap(),
244            data_type::float64(),
245            0f64,
246        )
247        .unwrap();
248    }
249
250    #[test]
251    fn codec_pcodec_round_trip_complex_float16() {
252        codec_pcodec_round_trip_impl(
253            &PcodecCodec::new_with_configuration(&serde_json::from_str(JSON_VALID).unwrap())
254                .unwrap(),
255            data_type::complex_float16(),
256            num::complex::Complex::<half::f16>::new(
257                half::f16::from_f32(0f32),
258                half::f16::from_f32(0f32),
259            ),
260        )
261        .unwrap();
262    }
263
264    #[test]
265    fn codec_pcodec_round_trip_complex_float32() {
266        codec_pcodec_round_trip_impl(
267            &PcodecCodec::new_with_configuration(&serde_json::from_str(JSON_VALID).unwrap())
268                .unwrap(),
269            data_type::complex_float32(),
270            num::complex::Complex::<f32>::new(0f32, 0f32),
271        )
272        .unwrap();
273    }
274
275    #[test]
276    fn codec_pcodec_round_trip_complex_float64() {
277        codec_pcodec_round_trip_impl(
278            &PcodecCodec::new_with_configuration(&serde_json::from_str(JSON_VALID).unwrap())
279                .unwrap(),
280            data_type::complex_float64(),
281            num::complex::Complex::<f64>::new(0f64, 0f64),
282        )
283        .unwrap();
284    }
285
286    #[test]
287    fn codec_pcodec_round_trip_complex64() {
288        codec_pcodec_round_trip_impl(
289            &PcodecCodec::new_with_configuration(&serde_json::from_str(JSON_VALID).unwrap())
290                .unwrap(),
291            data_type::complex64(),
292            num::complex::Complex32::new(0f32, 0f32),
293        )
294        .unwrap();
295    }
296
297    #[test]
298    fn codec_pcodec_round_trip_complex128() {
299        codec_pcodec_round_trip_impl(
300            &PcodecCodec::new_with_configuration(&serde_json::from_str(JSON_VALID).unwrap())
301                .unwrap(),
302            data_type::complex128(),
303            num::complex::Complex64::new(0f64, 0f64),
304        )
305        .unwrap();
306    }
307
308    #[test]
309    fn codec_pcodec_round_trip_u8() {
310        assert!(
311            codec_pcodec_round_trip_impl(
312                &PcodecCodec::new_with_configuration(&serde_json::from_str(JSON_VALID).unwrap())
313                    .unwrap(),
314                data_type::uint8(),
315                0u8,
316            )
317            .is_err()
318        );
319    }
320
321    #[test]
322    fn codec_pcodec_partial_decode() {
323        let chunk_shape: ChunkShape = vec![NonZeroU64::new(4).unwrap(); 2];
324        let data_type = data_type::uint32();
325        let fill_value = FillValue::from(0u32);
326        let elements: Vec<u32> = (0..chunk_shape.num_elements_usize() as u32).collect();
327        let bytes = transmute_to_bytes_vec(elements);
328        let bytes: ArrayBytes = bytes.into();
329
330        let codec = Arc::new(
331            PcodecCodec::new_with_configuration(&serde_json::from_str(JSON_VALID).unwrap())
332                .unwrap(),
333        );
334
335        let encoded = codec
336            .encode(
337                bytes.clone(),
338                &chunk_shape,
339                &data_type,
340                &fill_value,
341                &CodecOptions::default(),
342            )
343            .unwrap();
344        let decoded_region = ArraySubset::new_with_ranges(&[1..3, 0..1]);
345        let input_handle = Arc::new(encoded);
346        let partial_decoder = codec
347            .partial_decoder(
348                input_handle.clone(),
349                &chunk_shape,
350                &data_type,
351                &fill_value,
352                &CodecOptions::default(),
353            )
354            .unwrap();
355        assert_eq!(partial_decoder.size_held(), input_handle.size_held()); // packbits partial decoder does not hold bytes
356        let decoded_partial_chunk = partial_decoder
357            .partial_decode(&decoded_region, &CodecOptions::default())
358            .unwrap();
359
360        let decoded_partial_chunk: Vec<u8> = decoded_partial_chunk
361            .into_fixed()
362            .unwrap()
363            .as_chunks::<1>()
364            .0
365            .iter()
366            .map(|b| u8::from_ne_bytes(*b))
367            .collect();
368        let answer: Vec<u32> = vec![4, 8];
369        assert_eq!(transmute_to_bytes_vec(answer), decoded_partial_chunk);
370    }
371
372    #[cfg(feature = "async")]
373    #[tokio::test]
374    async fn codec_pcodec_async_partial_decode() {
375        let chunk_shape: ChunkShape = vec![NonZeroU64::new(4).unwrap(); 2];
376        let data_type = data_type::uint32();
377        let fill_value = FillValue::from(0u32);
378        let elements: Vec<u32> = (0..chunk_shape.num_elements_usize() as u32).collect();
379        let bytes = transmute_to_bytes_vec(elements);
380        let bytes: ArrayBytes = bytes.into();
381
382        let codec = Arc::new(
383            PcodecCodec::new_with_configuration(&serde_json::from_str(JSON_VALID).unwrap())
384                .unwrap(),
385        );
386
387        let encoded = codec
388            .encode(
389                bytes.clone(),
390                &chunk_shape,
391                &data_type,
392                &fill_value,
393                &CodecOptions::default(),
394            )
395            .unwrap();
396        let decoded_region = ArraySubset::new_with_ranges(&[1..3, 0..1]);
397        let input_handle = Arc::new(encoded);
398        let partial_decoder = codec
399            .async_partial_decoder(
400                input_handle,
401                &chunk_shape,
402                &data_type,
403                &fill_value,
404                &CodecOptions::default(),
405            )
406            .await
407            .unwrap();
408        let decoded_partial_chunk = partial_decoder
409            .partial_decode(&decoded_region, &CodecOptions::default())
410            .await
411            .unwrap();
412
413        let decoded_partial_chunk: Vec<u8> = decoded_partial_chunk
414            .into_fixed()
415            .unwrap()
416            .as_chunks::<1>()
417            .0
418            .iter()
419            .map(|b| u8::from_ne_bytes(*b))
420            .collect();
421        let answer: Vec<u32> = vec![4, 8];
422        assert_eq!(transmute_to_bytes_vec(answer), decoded_partial_chunk);
423    }
424}