Skip to main content

zarrs/array/codec/bytes_to_bytes/
crc32c.rs

1//! The `crc32c` bytes to bytes codec (Core).
2//!
3//! Appends a CRC32C checksum of the input bytestream.
4//!
5//! ### Compatible Implementations
6//! This is a core codec and should be compatible with all Zarr V3 implementations that support it.
7//!
8//! ### Specification
9//! - <https://zarr-specs.readthedocs.io/en/latest/v3/codecs/crc32c/index.html>
10//! - <https://github.com/zarr-developers/zarr-extensions/tree/main/codecs/crc32c>
11//!
12//! ### Codec `name` Aliases (Zarr V3)
13//! - `crc32c`
14//!
15//! ### Codec `id` Aliases (Zarr V2)
16//! - `crc32c`
17//!
18//! ### Codec `configuration` Example - [`Crc32cCodecConfiguration`]:
19//! ```rust
20//! # let JSON = r#"
21//! {}
22//! # "#;
23//! # use zarrs::metadata_ext::codec::crc32c::Crc32cCodecConfiguration;
24//! # serde_json::from_str::<Crc32cCodecConfiguration>(JSON).unwrap();
25//! ```
26
27mod crc32c_codec;
28
29use std::sync::Arc;
30
31pub use crc32c_codec::Crc32cCodec;
32use zarrs_metadata::v2::MetadataV2;
33use zarrs_metadata::v3::MetadataV3;
34
35use zarrs_codec::Codec;
36pub use zarrs_metadata_ext::codec::crc32c::{
37    Crc32cCodecConfiguration, Crc32cCodecConfigurationNumcodecs, Crc32cCodecConfigurationV1,
38};
39use zarrs_plugin::PluginCreateError;
40
41zarrs_plugin::impl_extension_aliases!(Crc32cCodec, v3: "crc32c", v2: "crc32c");
42
43// Register the V3 codec.
44inventory::submit! {
45    zarrs_codec::CodecPluginV3::new::<Crc32cCodec>()
46}
47
48impl zarrs_codec::CodecTraitsV3 for Crc32cCodec {
49    fn create(metadata: &MetadataV3) -> Result<Codec, PluginCreateError> {
50        let configuration = if metadata.name() == "numcodecs.crc32c" {
51            Crc32cCodecConfiguration::Numcodecs(
52                metadata.to_typed_configuration::<Crc32cCodecConfigurationNumcodecs>()?,
53            )
54        } else {
55            Crc32cCodecConfiguration::V1(
56                metadata.to_typed_configuration::<Crc32cCodecConfigurationV1>()?,
57            )
58        };
59        let codec = Arc::new(Crc32cCodec::new_with_configuration(&configuration));
60        Ok(Codec::BytesToBytes(codec))
61    }
62}
63
64impl zarrs_codec::CodecTraitsV2 for Crc32cCodec {
65    fn create(metadata: &MetadataV2) -> Result<Codec, PluginCreateError> {
66        let configuration = Crc32cCodecConfiguration::Numcodecs(
67            metadata.to_typed_configuration::<Crc32cCodecConfigurationNumcodecs>()?,
68        );
69        let codec = Arc::new(Crc32cCodec::new_with_configuration(&configuration));
70        Ok(Codec::BytesToBytes(codec))
71    }
72}
73
74const CHECKSUM_SIZE: usize = size_of::<u32>();
75
76#[cfg(test)]
77mod tests {
78    use std::borrow::Cow;
79    use std::sync::Arc;
80
81    use super::*;
82    use crate::array::BytesRepresentation;
83    use zarrs_codec::{
84        BytesPartialDecoderTraits, BytesToBytesCodecTraits, CodecMetadataOptions, CodecOptions,
85        CodecTraits,
86    };
87    use zarrs_storage::byte_range::ByteRange;
88
89    const JSON1: &str = r"{}";
90
91    #[test]
92    fn codec_crc32c_configuration_none() {
93        let codec_configuration: Crc32cCodecConfiguration = serde_json::from_str(r"{}").unwrap();
94        let codec = Crc32cCodec::new_with_configuration(&codec_configuration);
95        let metadata = codec
96            .configuration_v3(&CodecMetadataOptions::default())
97            .unwrap();
98        assert_eq!(serde_json::to_string(&metadata).unwrap(), r"{}");
99    }
100
101    #[test]
102    fn codec_crc32c() {
103        let elements: Vec<u8> = (0..6).collect();
104        let bytes = elements;
105        let bytes_representation = BytesRepresentation::FixedSize(bytes.len() as u64);
106
107        let codec_configuration: Crc32cCodecConfiguration = serde_json::from_str(JSON1).unwrap();
108        let codec = Crc32cCodec::new_with_configuration(&codec_configuration);
109
110        let encoded = codec
111            .encode(Cow::Borrowed(&bytes), &CodecOptions::default())
112            .unwrap();
113        let decoded = codec
114            .decode(
115                encoded.clone(),
116                &bytes_representation,
117                &CodecOptions::default(),
118            )
119            .unwrap();
120        assert_eq!(bytes, decoded.to_vec());
121
122        // Check that the checksum is correct
123        let checksum: &[u8; 4] = &encoded[encoded.len() - size_of::<u32>()..encoded.len()]
124            .try_into()
125            .unwrap();
126        println!("checksum {checksum:?}");
127        assert_eq!(checksum, &[20, 133, 9, 65]);
128    }
129
130    #[test]
131    fn codec_crc32c_partial_decode() {
132        let elements: Vec<u8> = (0..32).collect();
133        let bytes = elements;
134        let bytes_representation = BytesRepresentation::FixedSize(bytes.len() as u64);
135
136        let codec_configuration: Crc32cCodecConfiguration = serde_json::from_str(JSON1).unwrap();
137        let codec = Arc::new(Crc32cCodec::new_with_configuration(&codec_configuration));
138
139        let encoded = codec
140            .encode(Cow::Owned(bytes), &CodecOptions::default())
141            .unwrap();
142        let decoded_regions = [ByteRange::FromStart(3, Some(2))];
143        let input_handle = Arc::new(encoded);
144        let partial_decoder = codec
145            .partial_decoder(
146                input_handle.clone(),
147                &bytes_representation,
148                &CodecOptions::default(),
149            )
150            .unwrap();
151        assert_eq!(partial_decoder.size_held(), input_handle.size_held()); // crc32c partial decoder does not hold bytes
152        let decoded_partial_chunk = partial_decoder
153            .partial_decode_many(
154                Box::new(decoded_regions.into_iter()),
155                &CodecOptions::default(),
156            )
157            .unwrap()
158            .unwrap();
159        let answer: &[Vec<u8>] = &[vec![3, 4]];
160        assert_eq!(
161            answer,
162            decoded_partial_chunk
163                .into_iter()
164                .map(|v| v.to_vec())
165                .collect::<Vec<_>>()
166        );
167    }
168
169    #[cfg(feature = "async")]
170    #[tokio::test]
171    async fn codec_crc32c_async_partial_decode() {
172        let elements: Vec<u8> = (0..32).collect();
173        let bytes = elements;
174        let bytes_representation = BytesRepresentation::FixedSize(bytes.len() as u64);
175
176        let codec_configuration: Crc32cCodecConfiguration = serde_json::from_str(JSON1).unwrap();
177        let codec = Arc::new(Crc32cCodec::new_with_configuration(&codec_configuration));
178
179        let encoded = codec
180            .encode(Cow::Owned(bytes), &CodecOptions::default())
181            .unwrap();
182        let decoded_regions = [ByteRange::FromStart(3, Some(2))];
183        let input_handle = Arc::new(encoded);
184        let partial_decoder = codec
185            .async_partial_decoder(
186                input_handle,
187                &bytes_representation,
188                &CodecOptions::default(),
189            )
190            .await
191            .unwrap();
192        let decoded_partial_chunk = partial_decoder
193            .partial_decode_many(
194                Box::new(decoded_regions.into_iter()),
195                &CodecOptions::default(),
196            )
197            .await
198            .unwrap()
199            .unwrap();
200        let answer: &[Vec<u8>] = &[vec![3, 4]];
201        assert_eq!(
202            answer,
203            decoded_partial_chunk
204                .into_iter()
205                .map(|v| v.to_vec())
206                .collect::<Vec<_>>()
207        );
208    }
209}