Skip to main content

zarrs/array/codec/array_to_bytes/
packbits.rs

1//! The `packbits` array to bytes codec.
2//!
3//! Packs together values with non-byte-aligned sizes.
4//!
5//! ### Specification
6//! - <https://github.com/zarr-developers/zarr-extensions/blob/8a28c319023598d40b9a5b5a0dae0a446d497520/codecs/packbits/README.md>
7//!
8//! ### Codec `name` Aliases (Zarr V3)
9//! - `packbits`
10//!
11//! ### Codec `id` Aliases (Zarr V2)
12//! None
13//!
14//! ### Codec `configuration` Example - [`PackBitsCodecConfiguration`]:
15//! ```rust
16//! # let JSON = r#"
17//! {
18//!     "padding_encoding": "first_byte",
19//!     "first_bit": null,
20//!     "last_bit": null
21//! }
22//! # "#;
23//! # use zarrs::metadata_ext::codec::packbits::PackBitsCodecConfiguration;
24//! # serde_json::from_str::<PackBitsCodecConfiguration>(JSON).unwrap();
25//! ```
26
27mod data_type_extension_packbits_codec;
28mod packbits_codec;
29mod packbits_partial_decoder;
30
31use std::sync::Arc;
32
33use num::Integer;
34pub use packbits_codec::PackBitsCodec;
35use zarrs_metadata::v3::MetadataV3;
36
37use crate::array::DataType;
38use zarrs_codec::{Codec, CodecPluginV3, CodecTraitsV3};
39pub use zarrs_metadata_ext::codec::packbits::{
40    PackBitsCodecConfiguration, PackBitsCodecConfigurationV1,
41};
42use zarrs_plugin::PluginCreateError;
43
44zarrs_plugin::impl_extension_aliases!(PackBitsCodec, v3: "packbits");
45
46// Register the V3 codec.
47inventory::submit! {
48    CodecPluginV3::new::<PackBitsCodec>()
49}
50
51impl CodecTraitsV3 for PackBitsCodec {
52    fn create(metadata: &MetadataV3) -> Result<Codec, PluginCreateError> {
53        let configuration: PackBitsCodecConfiguration = metadata.to_typed_configuration()?;
54        let codec = Arc::new(PackBitsCodec::new_with_configuration(&configuration)?);
55        Ok(Codec::ArrayToBytes(codec))
56    }
57}
58
59// Re-export extension trait from zarrs_data_type
60pub use zarrs_data_type::codec_traits::packbits::{
61    PackBitsDataTypeExt, PackBitsDataTypePlugin, PackBitsDataTypeTraits,
62    impl_pack_bits_data_type_traits,
63};
64
65struct PackBitsCodecComponents {
66    pub component_size_bits: u64,
67    pub num_components: u64,
68    pub sign_extension: bool,
69}
70
71fn pack_bits_components(
72    data_type: &DataType,
73) -> Result<PackBitsCodecComponents, zarrs_data_type::DataTypeCodecError> {
74    let packbits = data_type.codec_packbits()?;
75    Ok(PackBitsCodecComponents {
76        component_size_bits: packbits.component_size_bits(),
77        num_components: packbits.num_components(),
78        sign_extension: packbits.sign_extension(),
79    })
80}
81
82fn div_rem_8bit(bit: u64, element_size_bits: u64) -> (u64, u8) {
83    let (element, element_bit) = bit.div_rem(&element_size_bits);
84    let element_size_bits_padded = 8 * element_size_bits.div_ceil(8);
85    let byte = (element * element_size_bits_padded + element_bit) / 8;
86    let byte_bit = (element_bit % 8) as u8;
87    (byte, byte_bit)
88}
89
90#[cfg(test)]
91mod tests {
92    use std::num::NonZeroU64;
93    use std::sync::Arc;
94
95    use num::Integer;
96    use zarrs_data_type::FillValue;
97
98    use crate::array::codec::BytesCodec;
99    use crate::array::element::{Element, ElementOwned};
100    use crate::array::{ArrayBytes, ArraySubset, data_type};
101    use zarrs_codec::{ArrayToBytesCodecTraits, BytesPartialDecoderTraits, CodecOptions};
102    use zarrs_metadata_ext::codec::packbits::PackBitsPaddingEncoding;
103
104    #[test]
105    fn div_rem_8bit() {
106        use super::div_rem_8bit;
107
108        assert_eq!(div_rem_8bit(0, 1), (0, 0));
109        assert_eq!(div_rem_8bit(1, 1), (1, 0));
110        assert_eq!(div_rem_8bit(2, 1), (2, 0));
111
112        assert_eq!(div_rem_8bit(0, 3), (0, 0));
113        assert_eq!(div_rem_8bit(1, 3), (0, 1));
114        assert_eq!(div_rem_8bit(2, 3), (0, 2));
115        assert_eq!(div_rem_8bit(3, 3), (1, 0));
116        assert_eq!(div_rem_8bit(4, 3), (1, 1));
117        assert_eq!(div_rem_8bit(5, 3), (1, 2));
118
119        assert_eq!(div_rem_8bit(0, 12), (0, 0));
120        assert_eq!(div_rem_8bit(7, 12), (0, 7));
121        assert_eq!(div_rem_8bit(8, 12), (1, 0));
122        assert_eq!(div_rem_8bit(9, 12), (1, 1));
123        assert_eq!(div_rem_8bit(10, 12), (1, 2));
124        assert_eq!(div_rem_8bit(11, 12), (1, 3));
125        assert_eq!(div_rem_8bit(12, 12), (2, 0));
126        assert_eq!(div_rem_8bit(13, 12), (2, 1));
127    }
128
129    #[test]
130    fn codec_packbits_bool() -> Result<(), Box<dyn std::error::Error>> {
131        for encoding in [
132            PackBitsPaddingEncoding::None,
133            PackBitsPaddingEncoding::FirstByte,
134            PackBitsPaddingEncoding::LastByte,
135        ] {
136            let codec = Arc::new(super::PackBitsCodec::new(encoding, None, None).unwrap());
137            let chunk_shape = vec![NonZeroU64::new(8).unwrap(), NonZeroU64::new(5).unwrap()];
138            let data_type = data_type::bool();
139            let fill_value = FillValue::from(false);
140
141            let elements: Vec<bool> = (0..40).map(|i| i % 3 == 0).collect();
142            let bytes = bool::into_array_bytes(&data_type, elements)?.into_owned();
143            // T F F T F
144            // F T F F T
145            // F F T F F
146            // T F F T F
147            // ...
148
149            // Encoding
150            let encoded = codec.encode(
151                bytes.clone(),
152                &chunk_shape,
153                &data_type,
154                &fill_value,
155                &CodecOptions::default(),
156            )?;
157            assert!((encoded.len() as u64) <= 40.div_ceil(&8) + 1);
158
159            // Decoding
160            let decoded = codec
161                .decode(
162                    encoded.clone(),
163                    &chunk_shape,
164                    &data_type,
165                    &fill_value,
166                    &CodecOptions::default(),
167                )
168                .unwrap();
169            assert_eq!(bytes, decoded);
170
171            // Partial decoding
172            let decoded_region = ArraySubset::new_with_ranges(&[1..4, 1..4]);
173            let input_handle = Arc::new(encoded);
174            let partial_decoder = codec
175                .partial_decoder(
176                    input_handle.clone(),
177                    &chunk_shape,
178                    &data_type,
179                    &fill_value,
180                    &CodecOptions::default(),
181                )
182                .unwrap();
183            assert_eq!(partial_decoder.size_held(), input_handle.size_held()); // packbits partial decoder does not hold bytes
184            let decoded_partial_chunk = partial_decoder
185                .partial_decode(&decoded_region, &CodecOptions::default())
186                .unwrap();
187            let decoded_partial_chunk =
188                bool::from_array_bytes(&data_type, decoded_partial_chunk).unwrap();
189            let answer: Vec<bool> =
190                vec![true, false, false, false, true, false, false, false, true];
191            assert_eq!(answer, decoded_partial_chunk);
192        }
193        Ok(())
194    }
195
196    #[test]
197    fn codec_packbits_float32() -> Result<(), Box<dyn std::error::Error>> {
198        for encoding in [
199            PackBitsPaddingEncoding::None,
200            PackBitsPaddingEncoding::FirstByte,
201            PackBitsPaddingEncoding::LastByte,
202        ] {
203            let codec = Arc::new(super::PackBitsCodec::new(encoding, None, None).unwrap());
204            let chunk_shape = vec![NonZeroU64::new(8).unwrap(), NonZeroU64::new(5).unwrap()];
205            let data_type = data_type::float32();
206            let fill_value = FillValue::from(0.0f32);
207
208            let elements: Vec<f32> = (0..40).map(|i| i as f32).collect();
209            let bytes = f32::to_array_bytes(&data_type, &elements)?.into_owned();
210
211            // Encoding
212            let encoded = codec.encode(
213                bytes.clone(),
214                &chunk_shape,
215                &data_type,
216                &fill_value,
217                &CodecOptions::default(),
218            )?;
219            assert!((encoded.len() as u64) <= (40 * 32).div_ceil(&8) + 1);
220
221            // Decoding
222            let decoded = codec
223                .decode(
224                    encoded.clone(),
225                    &chunk_shape,
226                    &data_type,
227                    &fill_value,
228                    &CodecOptions::default(),
229                )
230                .unwrap();
231            assert_eq!(bytes, decoded);
232
233            // Check it matches little endian bytes
234            let decoded = BytesCodec::little()
235                .decode(
236                    encoded.clone(),
237                    &chunk_shape,
238                    &data_type,
239                    &fill_value,
240                    &CodecOptions::default(),
241                )
242                .unwrap();
243            assert_eq!(bytes, decoded);
244        }
245        Ok(())
246    }
247
248    #[test]
249    fn codec_packbits_int16() -> Result<(), Box<dyn std::error::Error>> {
250        for last_bit in 11..15 {
251            for first_bit in 0..4 {
252                for encoding in [
253                    PackBitsPaddingEncoding::None,
254                    PackBitsPaddingEncoding::FirstByte,
255                    PackBitsPaddingEncoding::LastByte,
256                ] {
257                    let codec = Arc::new(
258                        super::PackBitsCodec::new(encoding, Some(first_bit), Some(last_bit))
259                            .unwrap(),
260                    );
261                    let chunk_shape =
262                        vec![NonZeroU64::new(8).unwrap(), NonZeroU64::new(5).unwrap()];
263                    let data_type = data_type::int16();
264                    let fill_value = FillValue::from(0i16);
265                    let elements: Vec<i16> = (-20..20).map(|i| (i as i16) << first_bit).collect();
266                    let bytes = i16::to_array_bytes(&data_type, &elements)?.into_owned();
267
268                    // Encoding
269                    let encoded = codec.encode(
270                        bytes.clone(),
271                        &chunk_shape,
272                        &data_type,
273                        &fill_value,
274                        &CodecOptions::default(),
275                    )?;
276                    assert!(
277                        (encoded.len() as u64) <= (40 * (last_bit - first_bit + 1)).div_ceil(8) + 1
278                    );
279
280                    // Decoding
281                    let decoded = codec
282                        .decode(
283                            encoded.clone(),
284                            &chunk_shape,
285                            &data_type,
286                            &fill_value,
287                            &CodecOptions::default(),
288                        )
289                        .unwrap();
290                    assert_eq!(elements, i16::from_array_bytes(&data_type, decoded)?);
291                }
292            }
293        }
294        Ok(())
295    }
296
297    #[test]
298    fn codec_packbits_uint2() -> Result<(), Box<dyn std::error::Error>> {
299        for encoding in [
300            PackBitsPaddingEncoding::None,
301            PackBitsPaddingEncoding::FirstByte,
302            PackBitsPaddingEncoding::LastByte,
303        ] {
304            let codec = Arc::new(super::PackBitsCodec::new(encoding, None, None).unwrap());
305            let chunk_shape = vec![NonZeroU64::new(4).unwrap(), NonZeroU64::new(1).unwrap()];
306            let data_type = data_type::uint2();
307            let fill_value = FillValue::from(0u8);
308
309            let elements: Vec<u8> = (0..4).map(|i| i as u8).collect();
310            let bytes = u8::to_array_bytes(&data_type, &elements)?.into_owned();
311
312            // Encoding
313            let encoded = codec.encode(
314                bytes.clone(),
315                &chunk_shape,
316                &data_type,
317                &fill_value,
318                &CodecOptions::default(),
319            )?;
320            assert!((encoded.len() as u64) <= (4 * 4).div_ceil(&8) + 1);
321
322            // Decoding
323            let decoded = codec
324                .decode(
325                    encoded.clone(),
326                    &chunk_shape,
327                    &data_type,
328                    &fill_value,
329                    &CodecOptions::default(),
330                )
331                .unwrap();
332            assert_eq!(elements, u8::from_array_bytes(&data_type, decoded)?);
333        }
334        Ok(())
335    }
336
337    #[test]
338    fn codec_packbits_uint4() -> Result<(), Box<dyn std::error::Error>> {
339        for encoding in [
340            PackBitsPaddingEncoding::None,
341            PackBitsPaddingEncoding::FirstByte,
342            PackBitsPaddingEncoding::LastByte,
343        ] {
344            let codec = Arc::new(super::PackBitsCodec::new(encoding, None, None).unwrap());
345            let chunk_shape = vec![NonZeroU64::new(16).unwrap(), NonZeroU64::new(1).unwrap()];
346            let data_type = data_type::uint4();
347            let fill_value = FillValue::from(0u8);
348
349            let elements: Vec<u8> = (0..16).map(|i| i as u8).collect();
350            let bytes = u8::to_array_bytes(&data_type, &elements)?.into_owned();
351
352            // Encoding
353            let encoded = codec.encode(
354                bytes.clone(),
355                &chunk_shape,
356                &data_type,
357                &fill_value,
358                &CodecOptions::default(),
359            )?;
360            assert!((encoded.len() as u64) <= (4 * 16).div_ceil(&8) + 1);
361
362            // Decoding
363            let decoded = codec
364                .decode(
365                    encoded.clone(),
366                    &chunk_shape,
367                    &data_type,
368                    &fill_value,
369                    &CodecOptions::default(),
370                )
371                .unwrap();
372            assert_eq!(elements, u8::from_array_bytes(&data_type, decoded)?);
373        }
374        Ok(())
375    }
376
377    #[test]
378    fn codec_packbits_int2() -> Result<(), Box<dyn std::error::Error>> {
379        for encoding in [
380            PackBitsPaddingEncoding::None,
381            PackBitsPaddingEncoding::FirstByte,
382            PackBitsPaddingEncoding::LastByte,
383        ] {
384            let codec = Arc::new(super::PackBitsCodec::new(encoding, None, None).unwrap());
385            let chunk_shape = vec![NonZeroU64::new(4).unwrap(), NonZeroU64::new(1).unwrap()];
386            let data_type = data_type::int2();
387            let fill_value = FillValue::from(0i8);
388
389            let elements: Vec<i8> = (-2..2).map(|i| i as i8).collect();
390            let bytes = i8::to_array_bytes(&data_type, &elements)?.into_owned();
391
392            // Encoding
393            let encoded = codec.encode(
394                bytes.clone(),
395                &chunk_shape,
396                &data_type,
397                &fill_value,
398                &CodecOptions::default(),
399            )?;
400            assert!((encoded.len() as u64) <= (4 * 4).div_ceil(&8) + 1);
401
402            // Decoding
403            let decoded = codec
404                .decode(
405                    encoded.clone(),
406                    &chunk_shape,
407                    &data_type,
408                    &fill_value,
409                    &CodecOptions::default(),
410                )
411                .unwrap();
412            assert_eq!(elements, i8::from_array_bytes(&data_type, decoded)?);
413        }
414        Ok(())
415    }
416
417    #[test]
418    fn codec_packbits_int4() -> Result<(), Box<dyn std::error::Error>> {
419        for encoding in [
420            PackBitsPaddingEncoding::None,
421            PackBitsPaddingEncoding::FirstByte,
422            PackBitsPaddingEncoding::LastByte,
423        ] {
424            let codec = Arc::new(super::PackBitsCodec::new(encoding, None, None).unwrap());
425            let chunk_shape = vec![NonZeroU64::new(16).unwrap(), NonZeroU64::new(1).unwrap()];
426            let data_type = data_type::int4();
427            let fill_value = FillValue::from(0i8);
428
429            let elements: Vec<i8> = (-8..8).map(|i| i as i8).collect();
430            let bytes = i8::to_array_bytes(&data_type, &elements)?.into_owned();
431
432            // Encoding
433            let encoded = codec.encode(
434                bytes.clone(),
435                &chunk_shape,
436                &data_type,
437                &fill_value,
438                &CodecOptions::default(),
439            )?;
440            assert!((encoded.len() as u64) <= (4 * 16).div_ceil(&8) + 1);
441
442            // Decoding
443            let decoded = codec
444                .decode(
445                    encoded.clone(),
446                    &chunk_shape,
447                    &data_type,
448                    &fill_value,
449                    &CodecOptions::default(),
450                )
451                .unwrap();
452            assert_eq!(elements, i8::from_array_bytes(&data_type, decoded)?);
453        }
454        Ok(())
455    }
456
457    #[test]
458    fn codec_packbits_float4_e2m1fn() -> Result<(), Box<dyn std::error::Error>> {
459        for encoding in [
460            PackBitsPaddingEncoding::None,
461            PackBitsPaddingEncoding::FirstByte,
462            PackBitsPaddingEncoding::LastByte,
463        ] {
464            let codec = Arc::new(super::PackBitsCodec::new(encoding, None, None).unwrap());
465            let chunk_shape = vec![NonZeroU64::new(16).unwrap(), NonZeroU64::new(1).unwrap()];
466            let data_type = data_type::float4_e2m1fn();
467            let fill_value = FillValue::from(0u8);
468
469            let bytes = ArrayBytes::new_flen((0..16).map(|i| i as u8).collect::<Vec<u8>>());
470
471            // Encoding
472            let encoded = codec.encode(
473                bytes.clone(),
474                &chunk_shape,
475                &data_type,
476                &fill_value,
477                &CodecOptions::default(),
478            )?;
479            assert!((encoded.len() as u64) <= (4 * 16).div_ceil(&8) + 1);
480
481            // Decoding
482            let decoded = codec
483                .decode(
484                    encoded.clone(),
485                    &chunk_shape,
486                    &data_type,
487                    &fill_value,
488                    &CodecOptions::default(),
489                )
490                .unwrap();
491            assert_eq!(bytes, decoded);
492        }
493        Ok(())
494    }
495
496    #[test]
497    fn codec_packbits_float6_e2m3fn() -> Result<(), Box<dyn std::error::Error>> {
498        for encoding in [
499            PackBitsPaddingEncoding::None,
500            PackBitsPaddingEncoding::FirstByte,
501            PackBitsPaddingEncoding::LastByte,
502        ] {
503            let codec = Arc::new(super::PackBitsCodec::new(encoding, None, None).unwrap());
504            let chunk_shape = vec![NonZeroU64::new(64).unwrap(), NonZeroU64::new(1).unwrap()];
505            let data_type = data_type::float6_e2m3fn();
506            let fill_value = FillValue::from(0u8);
507
508            let bytes = ArrayBytes::new_flen((0..64).map(|i| i as u8).collect::<Vec<u8>>());
509
510            // Encoding
511            let encoded = codec.encode(
512                bytes.clone(),
513                &chunk_shape,
514                &data_type,
515                &fill_value,
516                &CodecOptions::default(),
517            )?;
518            assert!((encoded.len() as u64) <= (6 * 64).div_ceil(&8) + 1);
519
520            // Decoding
521            let decoded = codec
522                .decode(
523                    encoded.clone(),
524                    &chunk_shape,
525                    &data_type,
526                    &fill_value,
527                    &CodecOptions::default(),
528                )
529                .unwrap();
530            assert_eq!(bytes, decoded);
531        }
532        Ok(())
533    }
534
535    #[test]
536    fn codec_packbits_float6_e3m2fn() -> Result<(), Box<dyn std::error::Error>> {
537        for encoding in [
538            PackBitsPaddingEncoding::None,
539            PackBitsPaddingEncoding::FirstByte,
540            PackBitsPaddingEncoding::LastByte,
541        ] {
542            let codec = Arc::new(super::PackBitsCodec::new(encoding, None, None).unwrap());
543            let chunk_shape = vec![NonZeroU64::new(64).unwrap(), NonZeroU64::new(1).unwrap()];
544            let data_type = data_type::float6_e3m2fn();
545            let fill_value = FillValue::from(0u8);
546
547            let bytes = ArrayBytes::new_flen((0..64).map(|i| i as u8).collect::<Vec<u8>>());
548
549            // Encoding
550            let encoded = codec.encode(
551                bytes.clone(),
552                &chunk_shape,
553                &data_type,
554                &fill_value,
555                &CodecOptions::default(),
556            )?;
557            assert!((encoded.len() as u64) <= (6 * 64).div_ceil(&8) + 1);
558
559            // Decoding
560            let decoded = codec
561                .decode(
562                    encoded.clone(),
563                    &chunk_shape,
564                    &data_type,
565                    &fill_value,
566                    &CodecOptions::default(),
567                )
568                .unwrap();
569            assert_eq!(bytes, decoded);
570        }
571        Ok(())
572    }
573}