bc_components/compressed.rs
1use std::{borrow::Cow, fmt::Formatter};
2
3use anyhow::{Result, anyhow, bail};
4use bc_crypto::hash::crc32;
5use bc_ur::prelude::*;
6use miniz_oxide::{deflate::compress_to_vec, inflate::decompress_to_vec};
7
8use crate::{DigestProvider, digest::Digest, tags};
9
10/// A compressed binary object with integrity verification.
11///
12/// `Compressed` provides a way to efficiently store and transmit binary data
13/// using the DEFLATE compression algorithm. It includes built-in integrity
14/// verification through a CRC32 checksum and optional cryptographic digest.
15///
16/// The compression is implemented using the raw DEFLATE format as described in
17/// [IETF RFC 1951](https://www.ietf.org/rfc/rfc1951.txt) with the following
18/// configuration equivalent to:
19///
20/// `deflateInit2(zstream, 5, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY)`
21///
22/// Features:
23/// - Automatic compression with configurable compression level
24/// - Integrity verification via CRC32 checksum
25/// - Optional cryptographic digest for content identification
26/// - Smart behavior for small data (stores uncompressed if compression would
27/// increase size)
28/// - CBOR serialization/deserialization support
29#[derive(Clone, Eq, PartialEq)]
30pub struct Compressed {
31 /// CRC32 checksum of the uncompressed data for integrity verification
32 checksum: u32,
33 /// Size of the original uncompressed data in bytes
34 uncompressed_size: usize,
35 /// The compressed data (or original data if compression is ineffective)
36 compressed_data: Vec<u8>,
37 /// Optional cryptographic digest of the content
38 digest: Option<Digest>,
39}
40
41impl Compressed {
42 /// Creates a new `Compressed` object with the specified parameters.
43 ///
44 /// This is a low-level constructor that allows direct creation of a
45 /// `Compressed` object without performing compression. It's primarily
46 /// intended for deserialization or when working with pre-compressed
47 /// data.
48 ///
49 /// # Parameters
50 ///
51 /// * `checksum` - CRC32 checksum of the uncompressed data
52 /// * `uncompressed_size` - Size of the original uncompressed data in bytes
53 /// * `compressed_data` - The compressed data bytes
54 /// * `digest` - Optional cryptographic digest of the content
55 ///
56 /// # Returns
57 ///
58 /// A `Result` containing the new `Compressed` object if successful,
59 /// or an error if the parameters are invalid.
60 ///
61 /// # Errors
62 ///
63 /// Returns an error if the compressed data is larger than the uncompressed
64 /// size, which would indicate a logical inconsistency.
65 ///
66 /// # Example
67 ///
68 /// ```
69 /// use bc_components::Compressed;
70 /// use bc_crypto::hash::crc32;
71 ///
72 /// let data = b"hello world";
73 /// let checksum = crc32(data);
74 /// let uncompressed_size = data.len();
75 ///
76 /// // In a real scenario, this would be actually compressed data
77 /// let compressed_data = data.to_vec();
78 ///
79 /// let compressed =
80 /// Compressed::new(checksum, uncompressed_size, compressed_data, None)
81 /// .unwrap();
82 /// ```
83 pub fn new(
84 checksum: u32,
85 uncompressed_size: usize,
86 compressed_data: Vec<u8>,
87 digest: Option<Digest>,
88 ) -> Result<Self> {
89 if compressed_data.len() > uncompressed_size {
90 bail!("Compressed data is larger than uncompressed size");
91 }
92 Ok(Self {
93 checksum,
94 uncompressed_size,
95 compressed_data,
96 digest,
97 })
98 }
99
100 /// Creates a new `Compressed` object by compressing the provided data.
101 ///
102 /// This is the primary method for creating compressed data. It
103 /// automatically handles compression using the DEFLATE algorithm with a
104 /// compression level of 6.
105 ///
106 /// If the compressed data would be larger than the original data (which can
107 /// happen with small or already compressed inputs), the original data
108 /// is stored instead.
109 ///
110 /// # Parameters
111 ///
112 /// * `uncompressed_data` - The original data to compress
113 /// * `digest` - Optional cryptographic digest of the content
114 ///
115 /// # Returns
116 ///
117 /// A new `Compressed` object containing the compressed (or original) data.
118 ///
119 /// # Example
120 ///
121 /// ```
122 /// use bc_components::Compressed;
123 ///
124 /// // Compress a string
125 /// let data = "This is a longer string that should compress well with repeated patterns. \
126 /// This is a longer string that should compress well with repeated patterns.";
127 /// let compressed = Compressed::from_uncompressed_data(data.as_bytes(), None);
128 ///
129 /// // The compressed size should be smaller than the original
130 /// assert!(compressed.compressed_size() < data.len());
131 ///
132 /// // We can recover the original data
133 /// let uncompressed = compressed.uncompress().unwrap();
134 /// assert_eq!(uncompressed, data.as_bytes());
135 /// ```
136 pub fn from_uncompressed_data(
137 uncompressed_data: impl AsRef<[u8]>,
138 digest: Option<Digest>,
139 ) -> Self {
140 let uncompressed_data = uncompressed_data.as_ref();
141 let compressed_data = compress_to_vec(uncompressed_data, 6);
142 let checksum = crc32(uncompressed_data);
143 let uncompressed_size = uncompressed_data.len();
144 let compressed_size = compressed_data.len();
145 if compressed_size != 0 && compressed_size < uncompressed_size {
146 Self {
147 checksum,
148 uncompressed_size,
149 compressed_data,
150 digest,
151 }
152 } else {
153 Self {
154 checksum,
155 uncompressed_size,
156 compressed_data: uncompressed_data.to_vec(),
157 digest,
158 }
159 }
160 }
161
162 /// Decompresses and returns the original uncompressed data.
163 ///
164 /// This method performs the reverse of the compression process, restoring
165 /// the original data. It also verifies the integrity of the data using the
166 /// stored checksum.
167 ///
168 /// # Returns
169 ///
170 /// A `Result` containing the uncompressed data if successful,
171 /// or an error if decompression fails or the checksum doesn't match.
172 ///
173 /// # Errors
174 ///
175 /// Returns an error if:
176 /// - The compressed data is corrupt and cannot be decompressed
177 /// - The checksum of the decompressed data doesn't match the stored
178 /// checksum
179 ///
180 /// # Example
181 ///
182 /// ```
183 /// use bc_components::Compressed;
184 ///
185 /// // Original data
186 /// let original = b"This is some example data to compress";
187 ///
188 /// // Compress it
189 /// let compressed = Compressed::from_uncompressed_data(original, None);
190 ///
191 /// // Uncompress to get the original data back
192 /// let uncompressed = compressed.uncompress().unwrap();
193 /// assert_eq!(uncompressed, original);
194 /// ```
195 pub fn uncompress(&self) -> Result<Vec<u8>> {
196 let compressed_size = self.compressed_data.len();
197 if compressed_size >= self.uncompressed_size {
198 return Ok(self.compressed_data.clone());
199 }
200
201 let uncompressed_data = decompress_to_vec(&self.compressed_data)
202 .map_err(|_| anyhow!("corrupt compressed data"))?;
203 if crc32(&uncompressed_data) != self.checksum {
204 bail!("compressed data checksum mismatch");
205 }
206
207 Ok(uncompressed_data)
208 }
209
210 /// Returns the size of the compressed data in bytes.
211 ///
212 /// # Returns
213 ///
214 /// The size of the compressed data in bytes.
215 ///
216 /// # Example
217 ///
218 /// ```
219 /// use bc_components::Compressed;
220 ///
221 /// let data = b"Hello world!";
222 /// let compressed = Compressed::from_uncompressed_data(data, None);
223 ///
224 /// // For small inputs like this, compression might not be effective
225 /// // so the compressed_size might equal the original size
226 /// println!("Compressed size: {}", compressed.compressed_size());
227 /// ```
228 pub fn compressed_size(&self) -> usize { self.compressed_data.len() }
229
230 /// Returns the compression ratio of the data.
231 ///
232 /// The compression ratio is calculated as (compressed size) / (uncompressed
233 /// size), so lower values indicate better compression.
234 ///
235 /// # Returns
236 ///
237 /// A floating-point value representing the compression ratio.
238 /// - Values less than 1.0 indicate effective compression
239 /// - Values equal to 1.0 indicate no compression was applied
240 /// - Values of NaN can occur if the uncompressed size is zero
241 ///
242 /// # Example
243 ///
244 /// ```
245 /// use bc_components::Compressed;
246 ///
247 /// // A string with a lot of repetition should compress well
248 /// let data = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
249 /// let compressed = Compressed::from_uncompressed_data(data.as_bytes(), None);
250 ///
251 /// // Should have a good compression ratio (much less than 1.0)
252 /// let ratio = compressed.compression_ratio();
253 /// assert!(ratio < 0.5);
254 /// ```
255 pub fn compression_ratio(&self) -> f64 {
256 (self.compressed_size() as f64) / (self.uncompressed_size as f64)
257 }
258
259 /// Returns a reference to the digest of the compressed data, if available.
260 ///
261 /// # Returns
262 ///
263 /// An optional reference to the `Digest` associated with this compressed
264 /// data.
265 ///
266 /// # Example
267 ///
268 /// ```
269 /// use bc_components::{Compressed, Digest};
270 ///
271 /// let data = b"Hello world!";
272 /// let digest = Digest::from_image(data);
273 /// let compressed =
274 /// Compressed::from_uncompressed_data(data, Some(digest.clone()));
275 ///
276 /// // We can retrieve the digest we associated with the compressed data
277 /// assert_eq!(compressed.digest_ref_opt(), Some(&digest));
278 /// ```
279 pub fn digest_ref_opt(&self) -> Option<&Digest> { self.digest.as_ref() }
280
281 /// Returns whether this compressed data has an associated digest.
282 ///
283 /// # Returns
284 ///
285 /// `true` if this compressed data has a digest, `false` otherwise.
286 ///
287 /// # Example
288 ///
289 /// ```
290 /// use bc_components::{Compressed, Digest};
291 ///
292 /// // Create compressed data without a digest
293 /// let compressed1 = Compressed::from_uncompressed_data(b"Hello", None);
294 /// assert!(!compressed1.has_digest());
295 ///
296 /// // Create compressed data with a digest
297 /// let digest = Digest::from_image(b"Hello");
298 /// let compressed2 =
299 /// Compressed::from_uncompressed_data(b"Hello", Some(digest));
300 /// assert!(compressed2.has_digest());
301 /// ```
302 pub fn has_digest(&self) -> bool { self.digest.is_some() }
303}
304
305/// Implementation of the `DigestProvider` trait for `Compressed`.
306///
307/// Allows `Compressed` objects with digests to be used with APIs that accept
308/// `DigestProvider` implementations.
309impl DigestProvider for Compressed {
310 /// Returns the cryptographic digest associated with this compressed data.
311 ///
312 /// # Returns
313 ///
314 /// A `Cow<'_, Digest>` containing the digest.
315 ///
316 /// # Panics
317 ///
318 /// Panics if there is no digest associated with this compressed data.
319 /// Use `has_digest()` or `digest_ref_opt()` to check before calling this
320 /// method.
321 fn digest(&self) -> Cow<'_, Digest> {
322 Cow::Owned(self.digest.as_ref().unwrap().clone())
323 }
324}
325
326/// Implementation of the `Debug` trait for `Compressed`.
327///
328/// Provides a human-readable debug representation of a `Compressed` object
329/// showing its key properties: checksum, sizes, compression ratio, and digest.
330impl std::fmt::Debug for Compressed {
331 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
332 write!(
333 f,
334 "Compressed(checksum: {}, size: {}/{}, ratio: {:.2}, digest: {})",
335 hex::encode(self.checksum.to_be_bytes()),
336 self.compressed_size(),
337 self.uncompressed_size,
338 self.compression_ratio(),
339 self.digest_ref_opt()
340 .map(|d| d.short_description())
341 .unwrap_or_else(|| "None".to_string())
342 )
343 }
344}
345
346/// Implementation of `AsRef<Compressed>` for `Compressed`.
347///
348/// This allows passing a `Compressed` instance to functions that take
349/// `AsRef<Compressed>` parameters.
350impl AsRef<Compressed> for Compressed {
351 fn as_ref(&self) -> &Compressed { self }
352}
353
354/// Implementation of the `CBORTagged` trait for `Compressed`.
355///
356/// Defines the CBOR tag(s) used when serializing a `Compressed` object.
357impl CBORTagged for Compressed {
358 fn cbor_tags() -> Vec<Tag> { tags_for_values(&[tags::TAG_COMPRESSED]) }
359}
360
361/// Conversion from `Compressed` to CBOR for serialization.
362impl From<Compressed> for CBOR {
363 fn from(value: Compressed) -> Self { value.tagged_cbor() }
364}
365
366/// Implementation of CBOR encoding for `Compressed`.
367///
368/// Defines how a `Compressed` object is serialized to untagged CBOR.
369/// The format is:
370/// ```text
371/// [
372/// checksum: uint,
373/// uncompressed_size: uint,
374/// compressed_data: bytes,
375/// digest?: Digest // Optional
376/// ]
377/// ```
378impl CBORTaggedEncodable for Compressed {
379 fn untagged_cbor(&self) -> CBOR {
380 let mut elements = vec![
381 self.checksum.into(),
382 self.uncompressed_size.into(),
383 CBOR::to_byte_string(&self.compressed_data),
384 ];
385 if let Some(digest) = self.digest.clone() {
386 elements.push(digest.into());
387 }
388 CBORCase::Array(elements).into()
389 }
390}
391
392/// Conversion from CBOR to `Compressed` for deserialization.
393impl TryFrom<CBOR> for Compressed {
394 type Error = dcbor::Error;
395
396 fn try_from(cbor: CBOR) -> dcbor::Result<Self> {
397 Self::from_tagged_cbor(cbor)
398 }
399}
400
401/// Implementation of CBOR decoding for `Compressed`.
402///
403/// Defines how to create a `Compressed` object from untagged CBOR.
404impl CBORTaggedDecodable for Compressed {
405 fn from_untagged_cbor(cbor: CBOR) -> dcbor::Result<Self> {
406 let elements = cbor.try_into_array()?;
407 if elements.len() < 3 || elements.len() > 4 {
408 return Err("invalid number of elements in compressed".into());
409 }
410 let checksum = elements[0].clone().try_into()?;
411 let uncompressed_size = elements[1].clone().try_into()?;
412 let compressed_data = elements[2].clone().try_into_byte_string()?;
413 let digest = if elements.len() == 4 {
414 Some(elements[3].clone().try_into()?)
415 } else {
416 None
417 };
418 Ok(Self::new(
419 checksum,
420 uncompressed_size,
421 compressed_data,
422 digest,
423 )?)
424 }
425}
426
427#[cfg(test)]
428mod tests {
429 use crate::Compressed;
430
431 #[test]
432 fn test_1() {
433 let source =
434 b"Lorem ipsum dolor sit amet consectetur adipiscing elit mi nibh ornare proin blandit diam ridiculus, faucibus mus dui eu vehicula nam donec dictumst sed vivamus bibendum aliquet efficitur. Felis imperdiet sodales dictum morbi vivamus augue dis duis aliquet velit ullamcorper porttitor, lobortis dapibus hac purus aliquam natoque iaculis blandit montes nunc pretium.";
435 let compressed = Compressed::from_uncompressed_data(source, None);
436 assert_eq!(
437 format!("{:?}", compressed),
438 "Compressed(checksum: 3eeb10a0, size: 217/364, ratio: 0.60, digest: None)"
439 );
440 assert_eq!(compressed.uncompress().unwrap(), source);
441 }
442
443 #[test]
444 fn test_2() {
445 let source = b"Lorem ipsum dolor sit amet consectetur adipiscing";
446 let compressed = Compressed::from_uncompressed_data(source, None);
447 assert_eq!(
448 format!("{:?}", compressed),
449 "Compressed(checksum: 29db1793, size: 45/49, ratio: 0.92, digest: None)"
450 );
451 assert_eq!(compressed.uncompress().unwrap(), source);
452 }
453
454 #[test]
455 fn test_3() {
456 let source = b"Lorem";
457 let compressed = Compressed::from_uncompressed_data(source, None);
458 assert_eq!(
459 format!("{:?}", compressed),
460 "Compressed(checksum: 44989b39, size: 5/5, ratio: 1.00, digest: None)"
461 );
462 assert_eq!(compressed.uncompress().unwrap(), source);
463 }
464
465 #[test]
466 fn test_4() {
467 let source = b"";
468 let compressed = Compressed::from_uncompressed_data(source, None);
469 assert_eq!(
470 format!("{:?}", compressed),
471 "Compressed(checksum: 00000000, size: 0/0, ratio: NaN, digest: None)"
472 );
473 assert_eq!(compressed.uncompress().unwrap(), source);
474 }
475}