zarrs/array/codec/array_to_bytes/
bytes.rs1mod bytes_codec;
33mod bytes_partial_decoder;
34
35use std::sync::Arc;
36
37use crate::metadata::Endianness;
38
39pub use zarrs_metadata_ext::codec::bytes::{BytesCodecConfiguration, BytesCodecConfigurationV1};
40use zarrs_registry::codec::BYTES;
41
42pub use bytes_codec::BytesCodec;
43
44#[cfg(feature = "async")]
45pub(crate) use bytes_partial_decoder::AsyncBytesPartialDecoder;
46pub(crate) use bytes_partial_decoder::BytesPartialDecoder;
47
48use crate::{
49 array::{
50 codec::{Codec, CodecPlugin},
51 DataType,
52 },
53 metadata::v3::MetadataV3,
54 plugin::{PluginCreateError, PluginMetadataInvalidError},
55};
56
57inventory::submit! {
59 CodecPlugin::new(BYTES, is_identifier_bytes, create_codec_bytes)
60}
61
62fn is_identifier_bytes(identifier: &str) -> bool {
63 identifier == BYTES
64}
65
66pub(crate) fn create_codec_bytes(metadata: &MetadataV3) -> Result<Codec, PluginCreateError> {
67 let configuration: BytesCodecConfiguration = metadata
68 .to_configuration()
69 .map_err(|_| PluginMetadataInvalidError::new(BYTES, "codec", metadata.to_string()))?;
70 let codec = Arc::new(BytesCodec::new_with_configuration(&configuration)?);
71 Ok(Codec::ArrayToBytes(codec))
72}
73
74pub(crate) fn reverse_endianness(v: &mut [u8], data_type: &DataType) {
76 match data_type {
77 DataType::Bool
78 | DataType::Int2
79 | DataType::Int4
80 | DataType::Int8
81 | DataType::UInt2
82 | DataType::UInt4
83 | DataType::UInt8
84 | DataType::Float4E2M1FN
85 | DataType::Float6E2M3FN
86 | DataType::Float6E3M2FN
87 | DataType::Float8E3M4
88 | DataType::Float8E4M3
89 | DataType::Float8E4M3B11FNUZ
90 | DataType::Float8E4M3FNUZ
91 | DataType::Float8E5M2
92 | DataType::Float8E5M2FNUZ
93 | DataType::Float8E8M0FNU
94 | DataType::ComplexFloat4E2M1FN
95 | DataType::ComplexFloat6E2M3FN
96 | DataType::ComplexFloat6E3M2FN
97 | DataType::ComplexFloat8E3M4
98 | DataType::ComplexFloat8E4M3
99 | DataType::ComplexFloat8E4M3B11FNUZ
100 | DataType::ComplexFloat8E4M3FNUZ
101 | DataType::ComplexFloat8E5M2
102 | DataType::ComplexFloat8E5M2FNUZ
103 | DataType::ComplexFloat8E8M0FNU
104 | DataType::RawBits(_) => {}
105 DataType::Int16
106 | DataType::UInt16
107 | DataType::Float16
108 | DataType::BFloat16
109 | DataType::ComplexFloat16
110 | DataType::ComplexBFloat16 => {
111 let swap = |chunk: &mut [u8]| {
112 let bytes = u16::from_ne_bytes(unsafe { chunk.try_into().unwrap_unchecked() });
113 chunk.copy_from_slice(bytes.swap_bytes().to_ne_bytes().as_slice());
114 };
115 v.chunks_exact_mut(2).for_each(swap);
116 }
117 DataType::Int32
118 | DataType::UInt32
119 | DataType::Float32
120 | DataType::Complex64
121 | DataType::ComplexFloat32 => {
122 let swap = |chunk: &mut [u8]| {
123 let bytes = u32::from_ne_bytes(unsafe { chunk.try_into().unwrap_unchecked() });
124 chunk.copy_from_slice(bytes.swap_bytes().to_ne_bytes().as_slice());
125 };
126 v.chunks_exact_mut(4).for_each(swap);
127 }
128 DataType::Int64
129 | DataType::UInt64
130 | DataType::Float64
131 | DataType::Complex128
132 | DataType::ComplexFloat64
133 | DataType::NumpyDateTime64 {
134 unit: _,
135 scale_factor: _,
136 }
137 | DataType::NumpyTimeDelta64 {
138 unit: _,
139 scale_factor: _,
140 } => {
141 let swap = |chunk: &mut [u8]| {
142 let bytes = u64::from_ne_bytes(unsafe { chunk.try_into().unwrap_unchecked() });
143 chunk.copy_from_slice(bytes.swap_bytes().to_ne_bytes().as_slice());
144 };
145 v.chunks_exact_mut(8).for_each(swap);
146 }
147 DataType::Extension(_) | DataType::String | DataType::Bytes => {
149 unreachable!()
150 }
151 }
152}
153
154#[cfg(test)]
155mod tests {
156 use std::{num::NonZeroU64, sync::Arc};
157
158 use crate::{
159 array::{
160 codec::{ArrayToBytesCodecTraits, CodecOptions, CodecTraits},
161 ArrayBytes, ChunkRepresentation, ChunkShape, Endianness, FillValue,
162 },
163 array_subset::ArraySubset,
164 };
165
166 use super::*;
167
168 #[test]
169 fn codec_bytes_configuration_big() {
170 let codec_configuration: BytesCodecConfiguration =
171 serde_json::from_str(r#"{"endian":"big"}"#).unwrap();
172 let codec = BytesCodec::new_with_configuration(&codec_configuration).unwrap();
173 let configuration = codec.configuration(BYTES).unwrap();
174 assert_eq!(
175 serde_json::to_string(&configuration).unwrap(),
176 r#"{"endian":"big"}"#
177 );
178 }
179
180 #[test]
181 fn codec_bytes_configuration_little() {
182 let codec_configuration: BytesCodecConfiguration =
183 serde_json::from_str(r#"{"endian":"little"}"#).unwrap();
184 let codec = BytesCodec::new_with_configuration(&codec_configuration).unwrap();
185 let configuration = codec.configuration(BYTES).unwrap();
186 assert_eq!(
187 serde_json::to_string(&configuration).unwrap(),
188 r#"{"endian":"little"}"#
189 );
190 }
191
192 #[test]
193 fn codec_bytes_configuration_none() {
194 let codec_configuration: BytesCodecConfiguration = serde_json::from_str(r#"{}"#).unwrap();
195 let codec = BytesCodec::new_with_configuration(&codec_configuration).unwrap();
196 let configuration = codec.configuration(BYTES).unwrap();
197 assert_eq!(serde_json::to_string(&configuration).unwrap(), r#"{}"#);
198 }
199
200 fn codec_bytes_round_trip_impl(
201 endianness: Option<Endianness>,
202 data_type: DataType,
203 fill_value: FillValue,
204 ) -> Result<(), Box<dyn std::error::Error>> {
205 let chunk_shape = vec![NonZeroU64::new(10).unwrap(), NonZeroU64::new(10).unwrap()];
206 let chunk_representation =
207 ChunkRepresentation::new(chunk_shape, data_type, fill_value).unwrap();
208 let size = chunk_representation.num_elements_usize()
209 * chunk_representation.data_type().fixed_size().unwrap();
210 let bytes: ArrayBytes = (0..size).map(|s| s as u8).collect::<Vec<_>>().into();
211
212 let codec = BytesCodec::new(endianness);
213
214 let encoded = codec.encode(
215 bytes.clone(),
216 &chunk_representation,
217 &CodecOptions::default(),
218 )?;
219 let decoded = codec
220 .decode(encoded, &chunk_representation, &CodecOptions::default())
221 .unwrap();
222 assert_eq!(bytes, decoded);
223 Ok(())
224 }
225
226 #[test]
227 fn codec_bytes_round_trip_f32() {
228 codec_bytes_round_trip_impl(
229 Some(Endianness::Big),
230 DataType::Float32,
231 FillValue::from(0.0f32),
232 )
233 .unwrap();
234 codec_bytes_round_trip_impl(
235 Some(Endianness::Little),
236 DataType::Float32,
237 FillValue::from(0.0f32),
238 )
239 .unwrap();
240 }
241
242 #[test]
243 fn codec_bytes_round_trip_u32() {
244 codec_bytes_round_trip_impl(
245 Some(Endianness::Big),
246 DataType::UInt32,
247 FillValue::from(0u32),
248 )
249 .unwrap();
250 codec_bytes_round_trip_impl(
251 Some(Endianness::Little),
252 DataType::UInt32,
253 FillValue::from(0u32),
254 )
255 .unwrap();
256 }
257
258 #[test]
259 fn codec_bytes_round_trip_u16() {
260 codec_bytes_round_trip_impl(
261 Some(Endianness::Big),
262 DataType::UInt16,
263 FillValue::from(0u16),
264 )
265 .unwrap();
266 codec_bytes_round_trip_impl(
267 Some(Endianness::Little),
268 DataType::UInt16,
269 FillValue::from(0u16),
270 )
271 .unwrap();
272 }
273
274 #[test]
275 fn codec_bytes_round_trip_u8() {
276 codec_bytes_round_trip_impl(Some(Endianness::Big), DataType::UInt8, FillValue::from(0u8))
277 .unwrap();
278 codec_bytes_round_trip_impl(
279 Some(Endianness::Little),
280 DataType::UInt8,
281 FillValue::from(0u8),
282 )
283 .unwrap();
284 codec_bytes_round_trip_impl(None, DataType::UInt8, FillValue::from(0u8)).unwrap();
285 }
286
287 #[test]
288 fn codec_bytes_round_trip_i32() {
289 codec_bytes_round_trip_impl(Some(Endianness::Big), DataType::Int32, FillValue::from(0))
290 .unwrap();
291 codec_bytes_round_trip_impl(
292 Some(Endianness::Little),
293 DataType::Int32,
294 FillValue::from(0),
295 )
296 .unwrap();
297 }
298
299 #[test]
300 fn codec_bytes_round_trip_i32_endianness_none() {
301 assert!(codec_bytes_round_trip_impl(None, DataType::Int32, FillValue::from(0)).is_err());
302 }
303
304 #[test]
305 fn codec_bytes_round_trip_complex64() {
306 codec_bytes_round_trip_impl(
307 Some(Endianness::Big),
308 DataType::Complex64,
309 FillValue::from(num::complex::Complex32::new(0.0, 0.0)),
310 )
311 .unwrap();
312 codec_bytes_round_trip_impl(
313 Some(Endianness::Little),
314 DataType::Complex64,
315 FillValue::from(num::complex::Complex32::new(0.0, 0.0)),
316 )
317 .unwrap();
318 }
319
320 #[test]
321 fn codec_bytes_round_trip_complex128() {
322 codec_bytes_round_trip_impl(
323 Some(Endianness::Big),
324 DataType::Complex128,
325 FillValue::from(num::complex::Complex64::new(0.0, 0.0)),
326 )
327 .unwrap();
328 codec_bytes_round_trip_impl(
329 Some(Endianness::Little),
330 DataType::Complex128,
331 FillValue::from(num::complex::Complex64::new(0.0, 0.0)),
332 )
333 .unwrap();
334 }
335
336 #[test]
337 fn codec_bytes_partial_decode() {
338 let chunk_shape: ChunkShape = vec![4, 4].try_into().unwrap();
339 let chunk_representation =
340 ChunkRepresentation::new(chunk_shape.to_vec(), DataType::UInt8, FillValue::from(0u8))
341 .unwrap();
342 let elements: Vec<u8> = (0..chunk_representation.num_elements() as u8).collect();
343 let bytes: ArrayBytes = elements.into();
344
345 let codec = Arc::new(BytesCodec::new(None));
346
347 let encoded = codec
348 .encode(
349 bytes.clone(),
350 &chunk_representation,
351 &CodecOptions::default(),
352 )
353 .unwrap();
354 let decoded_regions = [ArraySubset::new_with_ranges(&[1..3, 0..1])];
355 let input_handle = Arc::new(std::io::Cursor::new(encoded));
356 let partial_decoder = codec
357 .partial_decoder(
358 input_handle,
359 &chunk_representation,
360 &CodecOptions::default(),
361 )
362 .unwrap();
363 let decoded_partial_chunk = partial_decoder
364 .partial_decode(&decoded_regions, &CodecOptions::default())
365 .unwrap();
366
367 let decoded_partial_chunk: Vec<u8> = decoded_partial_chunk
368 .into_iter()
369 .map(|bytes| bytes.into_fixed().unwrap().to_vec())
370 .flatten()
371 .collect::<Vec<_>>()
372 .chunks(size_of::<u8>())
373 .map(|b| u8::from_ne_bytes(b.try_into().unwrap()))
374 .collect();
375 let answer: Vec<u8> = vec![4, 8];
376 assert_eq!(answer, decoded_partial_chunk);
377 }
378
379 #[cfg(feature = "async")]
380 #[tokio::test]
381 async fn codec_bytes_async_partial_decode() {
382 let chunk_shape: ChunkShape = vec![4, 4].try_into().unwrap();
383 let chunk_representation =
384 ChunkRepresentation::new(chunk_shape.to_vec(), DataType::UInt8, FillValue::from(0u8))
385 .unwrap();
386 let elements: Vec<u8> = (0..chunk_representation.num_elements() as u8).collect();
387 let bytes: ArrayBytes = elements.into();
388
389 let codec = Arc::new(BytesCodec::new(None));
390
391 let encoded = codec
392 .encode(
393 bytes.clone(),
394 &chunk_representation,
395 &CodecOptions::default(),
396 )
397 .unwrap();
398 let decoded_regions = [ArraySubset::new_with_ranges(&[1..3, 0..1])];
399 let input_handle = Arc::new(std::io::Cursor::new(encoded));
400 let partial_decoder = codec
401 .async_partial_decoder(
402 input_handle,
403 &chunk_representation,
404 &CodecOptions::default(),
405 )
406 .await
407 .unwrap();
408 let decoded_partial_chunk = partial_decoder
409 .partial_decode(&decoded_regions, &CodecOptions::default())
410 .await
411 .unwrap();
412
413 let decoded_partial_chunk: Vec<u8> = decoded_partial_chunk
414 .into_iter()
415 .map(|bytes| bytes.into_fixed().unwrap().to_vec())
416 .flatten()
417 .collect::<Vec<_>>()
418 .chunks(size_of::<u8>())
419 .map(|b| u8::from_ne_bytes(b.try_into().unwrap()))
420 .collect();
421 let answer: Vec<u8> = vec![4, 8];
422 assert_eq!(answer, decoded_partial_chunk);
423 }
424}