zarrs/array/codec/array_to_bytes/
bytes.rs

1//! The `bytes` array to bytes codec (Core).
2//!
3//! Encodes arrays of fixed-size numeric data types as little endian or big endian in lexicographical order.
4//!
5//! ### Compatible Implementations:
6//! This is a core codec and should be compatible with all Zarr V3 implementations that support it.
7//!
8//! ### Specification
9//! - <https://zarr-specs.readthedocs.io/en/latest/v3/codecs/bytes/index.html>
10//! - <https://github.com/zarr-developers/zarr-extensions/tree/main/codecs/bytes>
11//!
12//! ### Specification Deviations
13//! The `bytes` specification defines a fixed set of supported data types, whereas the `bytes` codec in `zarrs` supports any fixed size data type that implements the [`DataTypeExtensionBytesCodec`](zarrs_data_type::DataTypeExtensionBytesCodec) trait.
14//!
15//! ### Codec `name` Aliases (Zarr V3)
16//! - `bytes`
17//!
18//! ### Codec `id` Aliases (Zarr V2)
19//! None
20//!
21//! ### Codec `configuration` Example - [`BytesCodecConfiguration`]:
22//! ```rust
23//! # let JSON = r#"
24//! {
25//!     "endian": "little"
26//! }
27//! # "#;
28//! # use zarrs_metadata_ext::codec::bytes::BytesCodecConfiguration;
29//! # serde_json::from_str::<BytesCodecConfiguration>(JSON).unwrap();
30//! ```
31
32mod bytes_codec;
33mod bytes_partial_decoder;
34
35use std::sync::Arc;
36
37use crate::metadata::Endianness;
38
39pub use zarrs_metadata_ext::codec::bytes::{BytesCodecConfiguration, BytesCodecConfigurationV1};
40use zarrs_registry::codec::BYTES;
41
42pub use bytes_codec::BytesCodec;
43
44#[cfg(feature = "async")]
45pub(crate) use bytes_partial_decoder::AsyncBytesPartialDecoder;
46pub(crate) use bytes_partial_decoder::BytesPartialDecoder;
47
48use crate::{
49    array::{
50        codec::{Codec, CodecPlugin},
51        DataType,
52    },
53    metadata::v3::MetadataV3,
54    plugin::{PluginCreateError, PluginMetadataInvalidError},
55};
56
57// Register the codec.
58inventory::submit! {
59    CodecPlugin::new(BYTES, is_identifier_bytes, create_codec_bytes)
60}
61
62fn is_identifier_bytes(identifier: &str) -> bool {
63    identifier == BYTES
64}
65
66pub(crate) fn create_codec_bytes(metadata: &MetadataV3) -> Result<Codec, PluginCreateError> {
67    let configuration: BytesCodecConfiguration = metadata
68        .to_configuration()
69        .map_err(|_| PluginMetadataInvalidError::new(BYTES, "codec", metadata.to_string()))?;
70    let codec = Arc::new(BytesCodec::new_with_configuration(&configuration)?);
71    Ok(Codec::ArrayToBytes(codec))
72}
73
74/// Reverse the endianness of bytes for a given data type.
75pub(crate) fn reverse_endianness(v: &mut [u8], data_type: &DataType) {
76    match data_type {
77        DataType::Bool
78        | DataType::Int2
79        | DataType::Int4
80        | DataType::Int8
81        | DataType::UInt2
82        | DataType::UInt4
83        | DataType::UInt8
84        | DataType::Float4E2M1FN
85        | DataType::Float6E2M3FN
86        | DataType::Float6E3M2FN
87        | DataType::Float8E3M4
88        | DataType::Float8E4M3
89        | DataType::Float8E4M3B11FNUZ
90        | DataType::Float8E4M3FNUZ
91        | DataType::Float8E5M2
92        | DataType::Float8E5M2FNUZ
93        | DataType::Float8E8M0FNU
94        | DataType::ComplexFloat4E2M1FN
95        | DataType::ComplexFloat6E2M3FN
96        | DataType::ComplexFloat6E3M2FN
97        | DataType::ComplexFloat8E3M4
98        | DataType::ComplexFloat8E4M3
99        | DataType::ComplexFloat8E4M3B11FNUZ
100        | DataType::ComplexFloat8E4M3FNUZ
101        | DataType::ComplexFloat8E5M2
102        | DataType::ComplexFloat8E5M2FNUZ
103        | DataType::ComplexFloat8E8M0FNU
104        | DataType::RawBits(_) => {}
105        DataType::Int16
106        | DataType::UInt16
107        | DataType::Float16
108        | DataType::BFloat16
109        | DataType::ComplexFloat16
110        | DataType::ComplexBFloat16 => {
111            let swap = |chunk: &mut [u8]| {
112                let bytes = u16::from_ne_bytes(unsafe { chunk.try_into().unwrap_unchecked() });
113                chunk.copy_from_slice(bytes.swap_bytes().to_ne_bytes().as_slice());
114            };
115            v.chunks_exact_mut(2).for_each(swap);
116        }
117        DataType::Int32
118        | DataType::UInt32
119        | DataType::Float32
120        | DataType::Complex64
121        | DataType::ComplexFloat32 => {
122            let swap = |chunk: &mut [u8]| {
123                let bytes = u32::from_ne_bytes(unsafe { chunk.try_into().unwrap_unchecked() });
124                chunk.copy_from_slice(bytes.swap_bytes().to_ne_bytes().as_slice());
125            };
126            v.chunks_exact_mut(4).for_each(swap);
127        }
128        DataType::Int64
129        | DataType::UInt64
130        | DataType::Float64
131        | DataType::Complex128
132        | DataType::ComplexFloat64
133        | DataType::NumpyDateTime64 {
134            unit: _,
135            scale_factor: _,
136        }
137        | DataType::NumpyTimeDelta64 {
138            unit: _,
139            scale_factor: _,
140        } => {
141            let swap = |chunk: &mut [u8]| {
142                let bytes = u64::from_ne_bytes(unsafe { chunk.try_into().unwrap_unchecked() });
143                chunk.copy_from_slice(bytes.swap_bytes().to_ne_bytes().as_slice());
144            };
145            v.chunks_exact_mut(8).for_each(swap);
146        }
147        // Variable-sized data types and extensions are not supported and are rejected outside of this function
148        DataType::Extension(_) | DataType::String | DataType::Bytes => {
149            unreachable!()
150        }
151    }
152}
153
154#[cfg(test)]
155mod tests {
156    use std::{num::NonZeroU64, sync::Arc};
157
158    use crate::{
159        array::{
160            codec::{ArrayToBytesCodecTraits, CodecOptions, CodecTraits},
161            ArrayBytes, ChunkRepresentation, ChunkShape, Endianness, FillValue,
162        },
163        array_subset::ArraySubset,
164    };
165
166    use super::*;
167
168    #[test]
169    fn codec_bytes_configuration_big() {
170        let codec_configuration: BytesCodecConfiguration =
171            serde_json::from_str(r#"{"endian":"big"}"#).unwrap();
172        let codec = BytesCodec::new_with_configuration(&codec_configuration).unwrap();
173        let configuration = codec.configuration(BYTES).unwrap();
174        assert_eq!(
175            serde_json::to_string(&configuration).unwrap(),
176            r#"{"endian":"big"}"#
177        );
178    }
179
180    #[test]
181    fn codec_bytes_configuration_little() {
182        let codec_configuration: BytesCodecConfiguration =
183            serde_json::from_str(r#"{"endian":"little"}"#).unwrap();
184        let codec = BytesCodec::new_with_configuration(&codec_configuration).unwrap();
185        let configuration = codec.configuration(BYTES).unwrap();
186        assert_eq!(
187            serde_json::to_string(&configuration).unwrap(),
188            r#"{"endian":"little"}"#
189        );
190    }
191
192    #[test]
193    fn codec_bytes_configuration_none() {
194        let codec_configuration: BytesCodecConfiguration = serde_json::from_str(r#"{}"#).unwrap();
195        let codec = BytesCodec::new_with_configuration(&codec_configuration).unwrap();
196        let configuration = codec.configuration(BYTES).unwrap();
197        assert_eq!(serde_json::to_string(&configuration).unwrap(), r#"{}"#);
198    }
199
200    fn codec_bytes_round_trip_impl(
201        endianness: Option<Endianness>,
202        data_type: DataType,
203        fill_value: FillValue,
204    ) -> Result<(), Box<dyn std::error::Error>> {
205        let chunk_shape = vec![NonZeroU64::new(10).unwrap(), NonZeroU64::new(10).unwrap()];
206        let chunk_representation =
207            ChunkRepresentation::new(chunk_shape, data_type, fill_value).unwrap();
208        let size = chunk_representation.num_elements_usize()
209            * chunk_representation.data_type().fixed_size().unwrap();
210        let bytes: ArrayBytes = (0..size).map(|s| s as u8).collect::<Vec<_>>().into();
211
212        let codec = BytesCodec::new(endianness);
213
214        let encoded = codec.encode(
215            bytes.clone(),
216            &chunk_representation,
217            &CodecOptions::default(),
218        )?;
219        let decoded = codec
220            .decode(encoded, &chunk_representation, &CodecOptions::default())
221            .unwrap();
222        assert_eq!(bytes, decoded);
223        Ok(())
224    }
225
226    #[test]
227    fn codec_bytes_round_trip_f32() {
228        codec_bytes_round_trip_impl(
229            Some(Endianness::Big),
230            DataType::Float32,
231            FillValue::from(0.0f32),
232        )
233        .unwrap();
234        codec_bytes_round_trip_impl(
235            Some(Endianness::Little),
236            DataType::Float32,
237            FillValue::from(0.0f32),
238        )
239        .unwrap();
240    }
241
242    #[test]
243    fn codec_bytes_round_trip_u32() {
244        codec_bytes_round_trip_impl(
245            Some(Endianness::Big),
246            DataType::UInt32,
247            FillValue::from(0u32),
248        )
249        .unwrap();
250        codec_bytes_round_trip_impl(
251            Some(Endianness::Little),
252            DataType::UInt32,
253            FillValue::from(0u32),
254        )
255        .unwrap();
256    }
257
258    #[test]
259    fn codec_bytes_round_trip_u16() {
260        codec_bytes_round_trip_impl(
261            Some(Endianness::Big),
262            DataType::UInt16,
263            FillValue::from(0u16),
264        )
265        .unwrap();
266        codec_bytes_round_trip_impl(
267            Some(Endianness::Little),
268            DataType::UInt16,
269            FillValue::from(0u16),
270        )
271        .unwrap();
272    }
273
274    #[test]
275    fn codec_bytes_round_trip_u8() {
276        codec_bytes_round_trip_impl(Some(Endianness::Big), DataType::UInt8, FillValue::from(0u8))
277            .unwrap();
278        codec_bytes_round_trip_impl(
279            Some(Endianness::Little),
280            DataType::UInt8,
281            FillValue::from(0u8),
282        )
283        .unwrap();
284        codec_bytes_round_trip_impl(None, DataType::UInt8, FillValue::from(0u8)).unwrap();
285    }
286
287    #[test]
288    fn codec_bytes_round_trip_i32() {
289        codec_bytes_round_trip_impl(Some(Endianness::Big), DataType::Int32, FillValue::from(0))
290            .unwrap();
291        codec_bytes_round_trip_impl(
292            Some(Endianness::Little),
293            DataType::Int32,
294            FillValue::from(0),
295        )
296        .unwrap();
297    }
298
299    #[test]
300    fn codec_bytes_round_trip_i32_endianness_none() {
301        assert!(codec_bytes_round_trip_impl(None, DataType::Int32, FillValue::from(0)).is_err());
302    }
303
304    #[test]
305    fn codec_bytes_round_trip_complex64() {
306        codec_bytes_round_trip_impl(
307            Some(Endianness::Big),
308            DataType::Complex64,
309            FillValue::from(num::complex::Complex32::new(0.0, 0.0)),
310        )
311        .unwrap();
312        codec_bytes_round_trip_impl(
313            Some(Endianness::Little),
314            DataType::Complex64,
315            FillValue::from(num::complex::Complex32::new(0.0, 0.0)),
316        )
317        .unwrap();
318    }
319
320    #[test]
321    fn codec_bytes_round_trip_complex128() {
322        codec_bytes_round_trip_impl(
323            Some(Endianness::Big),
324            DataType::Complex128,
325            FillValue::from(num::complex::Complex64::new(0.0, 0.0)),
326        )
327        .unwrap();
328        codec_bytes_round_trip_impl(
329            Some(Endianness::Little),
330            DataType::Complex128,
331            FillValue::from(num::complex::Complex64::new(0.0, 0.0)),
332        )
333        .unwrap();
334    }
335
336    #[test]
337    fn codec_bytes_partial_decode() {
338        let chunk_shape: ChunkShape = vec![4, 4].try_into().unwrap();
339        let chunk_representation =
340            ChunkRepresentation::new(chunk_shape.to_vec(), DataType::UInt8, FillValue::from(0u8))
341                .unwrap();
342        let elements: Vec<u8> = (0..chunk_representation.num_elements() as u8).collect();
343        let bytes: ArrayBytes = elements.into();
344
345        let codec = Arc::new(BytesCodec::new(None));
346
347        let encoded = codec
348            .encode(
349                bytes.clone(),
350                &chunk_representation,
351                &CodecOptions::default(),
352            )
353            .unwrap();
354        let decoded_regions = [ArraySubset::new_with_ranges(&[1..3, 0..1])];
355        let input_handle = Arc::new(std::io::Cursor::new(encoded));
356        let partial_decoder = codec
357            .partial_decoder(
358                input_handle,
359                &chunk_representation,
360                &CodecOptions::default(),
361            )
362            .unwrap();
363        let decoded_partial_chunk = partial_decoder
364            .partial_decode(&decoded_regions, &CodecOptions::default())
365            .unwrap();
366
367        let decoded_partial_chunk: Vec<u8> = decoded_partial_chunk
368            .into_iter()
369            .map(|bytes| bytes.into_fixed().unwrap().to_vec())
370            .flatten()
371            .collect::<Vec<_>>()
372            .chunks(size_of::<u8>())
373            .map(|b| u8::from_ne_bytes(b.try_into().unwrap()))
374            .collect();
375        let answer: Vec<u8> = vec![4, 8];
376        assert_eq!(answer, decoded_partial_chunk);
377    }
378
379    #[cfg(feature = "async")]
380    #[tokio::test]
381    async fn codec_bytes_async_partial_decode() {
382        let chunk_shape: ChunkShape = vec![4, 4].try_into().unwrap();
383        let chunk_representation =
384            ChunkRepresentation::new(chunk_shape.to_vec(), DataType::UInt8, FillValue::from(0u8))
385                .unwrap();
386        let elements: Vec<u8> = (0..chunk_representation.num_elements() as u8).collect();
387        let bytes: ArrayBytes = elements.into();
388
389        let codec = Arc::new(BytesCodec::new(None));
390
391        let encoded = codec
392            .encode(
393                bytes.clone(),
394                &chunk_representation,
395                &CodecOptions::default(),
396            )
397            .unwrap();
398        let decoded_regions = [ArraySubset::new_with_ranges(&[1..3, 0..1])];
399        let input_handle = Arc::new(std::io::Cursor::new(encoded));
400        let partial_decoder = codec
401            .async_partial_decoder(
402                input_handle,
403                &chunk_representation,
404                &CodecOptions::default(),
405            )
406            .await
407            .unwrap();
408        let decoded_partial_chunk = partial_decoder
409            .partial_decode(&decoded_regions, &CodecOptions::default())
410            .await
411            .unwrap();
412
413        let decoded_partial_chunk: Vec<u8> = decoded_partial_chunk
414            .into_iter()
415            .map(|bytes| bytes.into_fixed().unwrap().to_vec())
416            .flatten()
417            .collect::<Vec<_>>()
418            .chunks(size_of::<u8>())
419            .map(|b| u8::from_ne_bytes(b.try_into().unwrap()))
420            .collect();
421        let answer: Vec<u8> = vec![4, 8];
422        assert_eq!(answer, decoded_partial_chunk);
423    }
424}