mod bytes_codec;
mod bytes_codec_partial;
use std::sync::Arc;
pub use bytes_codec::BytesCodec;
pub(crate) use bytes_codec_partial::BytesCodecPartial;
use zarrs_metadata::v3::MetadataV3;
use crate::array::DataType;
use zarrs_codec::{Codec, CodecPluginV3, CodecTraitsV3};
pub use zarrs_metadata_ext::codec::bytes::{BytesCodecConfiguration, BytesCodecConfigurationV1};
use zarrs_plugin::PluginCreateError;
pub use zarrs_data_type::codec_traits::bytes::{
BytesCodecEndiannessMissingError, BytesDataTypeExt, BytesDataTypePlugin, BytesDataTypeTraits,
impl_bytes_data_type_traits,
};
pub use zarrs_metadata::Endianness;
zarrs_plugin::impl_extension_aliases!(BytesCodec,
v3: "bytes", ["endian"]
);
inventory::submit! {
CodecPluginV3::new::<BytesCodec>()
}
impl CodecTraitsV3 for BytesCodec {
fn create(metadata: &MetadataV3) -> Result<Codec, PluginCreateError> {
if metadata.name() == "binary" {
crate::warn_deprecated_extension("binary", "codec", Some("bytes"));
}
let configuration: BytesCodecConfiguration = metadata.to_typed_configuration()?;
let codec = Arc::new(BytesCodec::new_with_configuration(&configuration)?);
Ok(Codec::ArrayToBytes(codec))
}
}
pub(crate) fn reverse_endianness(v: &mut [u8], data_type: &DataType) {
let Some(size) = data_type.fixed_size() else {
unreachable!()
};
match size {
1 => {}
2 => {
for chunk in v.as_chunks_mut::<2>().0 {
let bytes = u16::from_ne_bytes(*chunk);
*chunk = bytes.swap_bytes().to_ne_bytes();
}
}
4 => {
for chunk in v.as_chunks_mut::<4>().0 {
let bytes = u32::from_ne_bytes(*chunk);
*chunk = bytes.swap_bytes().to_ne_bytes();
}
}
8 => {
for chunk in v.as_chunks_mut::<8>().0 {
let bytes = u64::from_ne_bytes(*chunk);
*chunk = bytes.swap_bytes().to_ne_bytes();
}
}
_ => {
for chunk in v.chunks_exact_mut(size) {
chunk.reverse();
}
}
}
}
#[cfg(test)]
mod tests {
use std::num::NonZeroU64;
use std::sync::Arc;
use super::*;
use crate::array::{
ArrayBytes, ArraySubset, ChunkShape, ChunkShapeTraits, Endianness, FillValue, data_type,
};
use zarrs_codec::{
ArrayToBytesCodecTraits, BytesPartialDecoderTraits, CodecMetadataOptions, CodecOptions,
CodecTraits,
};
#[test]
fn codec_bytes_configuration_big() {
let codec_configuration: BytesCodecConfiguration =
serde_json::from_str(r#"{"endian":"big"}"#).unwrap();
let codec = BytesCodec::new_with_configuration(&codec_configuration).unwrap();
let configuration = codec
.configuration_v3(&CodecMetadataOptions::default())
.unwrap();
assert_eq!(
serde_json::to_string(&configuration).unwrap(),
r#"{"endian":"big"}"#
);
}
#[test]
fn codec_bytes_configuration_little() {
let codec_configuration: BytesCodecConfiguration =
serde_json::from_str(r#"{"endian":"little"}"#).unwrap();
let codec = BytesCodec::new_with_configuration(&codec_configuration).unwrap();
let configuration = codec
.configuration_v3(&CodecMetadataOptions::default())
.unwrap();
assert_eq!(
serde_json::to_string(&configuration).unwrap(),
r#"{"endian":"little"}"#
);
}
#[test]
fn codec_bytes_configuration_none() {
let codec_configuration: BytesCodecConfiguration = serde_json::from_str(r"{}").unwrap();
let codec = BytesCodec::new_with_configuration(&codec_configuration).unwrap();
let configuration = codec
.configuration_v3(&CodecMetadataOptions::default())
.unwrap();
assert_eq!(serde_json::to_string(&configuration).unwrap(), r"{}");
}
fn codec_bytes_round_trip_impl(
endianness: Option<Endianness>,
data_type: DataType,
fill_value: impl Into<FillValue>,
) -> Result<(), Box<dyn std::error::Error>> {
let chunk_shape = ChunkShape::from(vec![
NonZeroU64::new(10).unwrap(),
NonZeroU64::new(10).unwrap(),
]);
let fill_value = fill_value.into();
let size = chunk_shape.num_elements_u64() as usize * data_type.fixed_size().unwrap();
let bytes: ArrayBytes = (0..size).map(|s| s as u8).collect::<Vec<_>>().into();
let codec = BytesCodec::new(endianness);
let encoded = codec.encode(
bytes.clone(),
&chunk_shape,
&data_type,
&fill_value,
&CodecOptions::default(),
)?;
let decoded = codec
.decode(
encoded,
&chunk_shape,
&data_type,
&fill_value,
&CodecOptions::default(),
)
.unwrap();
assert_eq!(bytes, decoded);
Ok(())
}
#[test]
fn codec_bytes_round_trip_f32() {
codec_bytes_round_trip_impl(Some(Endianness::Big), data_type::float32(), 0.0f32).unwrap();
codec_bytes_round_trip_impl(Some(Endianness::Little), data_type::float32(), 0.0f32)
.unwrap();
}
#[test]
fn codec_bytes_round_trip_u32() {
codec_bytes_round_trip_impl(Some(Endianness::Big), data_type::uint32(), 0u32).unwrap();
codec_bytes_round_trip_impl(Some(Endianness::Little), data_type::uint32(), 0u32).unwrap();
}
#[test]
fn codec_bytes_round_trip_u16() {
codec_bytes_round_trip_impl(Some(Endianness::Big), data_type::uint16(), 0u16).unwrap();
codec_bytes_round_trip_impl(Some(Endianness::Little), data_type::uint16(), 0u16).unwrap();
}
#[test]
fn codec_bytes_round_trip_u8() {
codec_bytes_round_trip_impl(Some(Endianness::Big), data_type::uint8(), 0u8).unwrap();
codec_bytes_round_trip_impl(Some(Endianness::Little), data_type::uint8(), 0u8).unwrap();
codec_bytes_round_trip_impl(None, data_type::uint8(), 0u8).unwrap();
}
#[test]
fn codec_bytes_round_trip_i32() {
codec_bytes_round_trip_impl(Some(Endianness::Big), data_type::int32(), 0).unwrap();
codec_bytes_round_trip_impl(Some(Endianness::Little), data_type::int32(), 0).unwrap();
}
#[test]
fn codec_bytes_round_trip_i32_endianness_none() {
assert!(codec_bytes_round_trip_impl(None, data_type::int32(), 0).is_err());
}
#[test]
fn codec_bytes_round_trip_complex64() {
codec_bytes_round_trip_impl(
Some(Endianness::Big),
data_type::complex64(),
num::complex::Complex32::new(0.0, 0.0),
)
.unwrap();
codec_bytes_round_trip_impl(
Some(Endianness::Little),
data_type::complex64(),
num::complex::Complex32::new(0.0, 0.0),
)
.unwrap();
}
#[test]
fn codec_bytes_round_trip_complex128() {
codec_bytes_round_trip_impl(
Some(Endianness::Big),
data_type::complex128(),
num::complex::Complex64::new(0.0, 0.0),
)
.unwrap();
codec_bytes_round_trip_impl(
Some(Endianness::Little),
data_type::complex128(),
num::complex::Complex64::new(0.0, 0.0),
)
.unwrap();
}
#[test]
fn codec_bytes_partial_decode() {
let chunk_shape: ChunkShape = vec![NonZeroU64::new(4).unwrap(); 2];
let data_type = data_type::uint8();
let fill_value = FillValue::from(0u8);
let elements: Vec<u8> = (0..chunk_shape.num_elements_u64() as u8).collect();
let bytes: ArrayBytes = elements.into();
let codec = Arc::new(BytesCodec::new(None));
let encoded = codec
.encode(
bytes.clone(),
&chunk_shape,
&data_type,
&fill_value,
&CodecOptions::default(),
)
.unwrap();
let decoded_region = ArraySubset::new_with_ranges(&[1..3, 0..1]);
let input_handle = Arc::new(encoded);
let partial_decoder = codec
.partial_decoder(
input_handle.clone(),
&chunk_shape,
&data_type,
&fill_value,
&CodecOptions::default(),
)
.unwrap();
assert_eq!(partial_decoder.size_held(), input_handle.size_held()); let decoded_partial_chunk = partial_decoder
.partial_decode(&decoded_region, &CodecOptions::default())
.unwrap();
let decoded_partial_chunk: Vec<u8> = decoded_partial_chunk
.into_fixed()
.unwrap()
.as_chunks::<1>()
.0
.iter()
.map(|b| u8::from_ne_bytes(*b))
.collect();
let answer: Vec<u8> = vec![4, 8];
assert_eq!(answer, decoded_partial_chunk);
}
#[cfg(feature = "async")]
#[tokio::test]
async fn codec_bytes_async_partial_decode() {
let chunk_shape: ChunkShape = vec![NonZeroU64::new(4).unwrap(); 2];
let data_type = data_type::uint8();
let fill_value = FillValue::from(0u8);
let elements: Vec<u8> = (0..chunk_shape.num_elements_u64() as u8).collect();
let bytes: ArrayBytes = elements.into();
let codec = Arc::new(BytesCodec::new(None));
let encoded = codec
.encode(
bytes.clone(),
&chunk_shape,
&data_type,
&fill_value,
&CodecOptions::default(),
)
.unwrap();
let decoded_region = ArraySubset::new_with_ranges(&[1..3, 0..1]);
let input_handle = Arc::new(encoded);
let partial_decoder = codec
.async_partial_decoder(
input_handle,
&chunk_shape,
&data_type,
&fill_value,
&CodecOptions::default(),
)
.await
.unwrap();
let decoded_partial_chunk = partial_decoder
.partial_decode(&decoded_region, &CodecOptions::default())
.await
.unwrap();
let decoded_partial_chunk: Vec<u8> = decoded_partial_chunk
.into_fixed()
.unwrap()
.as_chunks::<1>()
.0
.iter()
.map(|b| u8::from_ne_bytes(*b))
.collect();
let answer: Vec<u8> = vec![4, 8];
assert_eq!(answer, decoded_partial_chunk);
}
}