bc_components/
compressed.rs

1use std::{borrow::Cow, fmt::Formatter};
2
3use bc_crypto::hash::crc32;
4use bc_ur::prelude::*;
5use miniz_oxide::{deflate::compress_to_vec, inflate::decompress_to_vec};
6
7use crate::{DigestProvider, Error, Result, digest::Digest, tags};
8
9/// A compressed binary object with integrity verification.
10///
11/// `Compressed` provides a way to efficiently store and transmit binary data
12/// using the DEFLATE compression algorithm. It includes built-in integrity
13/// verification through a CRC32 checksum and optional cryptographic digest.
14///
15/// The compression is implemented using the raw DEFLATE format as described in
16/// [IETF RFC 1951](https://www.ietf.org/rfc/rfc1951.txt) with the following
17/// configuration equivalent to:
18///
19/// `deflateInit2(zstream, 5, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY)`
20///
21/// Features:
22/// - Automatic compression with configurable compression level
23/// - Integrity verification via CRC32 checksum
24/// - Optional cryptographic digest for content identification
25/// - Smart behavior for small data (stores decompressed if compression would
26///   increase size)
27/// - CBOR serialization/deserialization support
28#[derive(Clone, Eq, PartialEq)]
29pub struct Compressed {
30    /// CRC32 checksum of the decompressed data for integrity verification
31    checksum: u32,
32    /// Size of the original decompressed data in bytes
33    decompressed_size: usize,
34    /// The compressed data (or original data if compression is ineffective)
35    compressed_data: Vec<u8>,
36    /// Optional cryptographic digest of the content
37    digest: Option<Digest>,
38}
39
40impl Compressed {
41    /// Creates a new `Compressed` object with the specified parameters.
42    ///
43    /// This is a low-level constructor that allows direct creation of a
44    /// `Compressed` object without performing compression. It's primarily
45    /// intended for deserialization or when working with pre-compressed
46    /// data.
47    ///
48    /// # Parameters
49    ///
50    /// * `checksum` - CRC32 checksum of the decompressed data
51    /// * `decompressed_size` - Size of the original decompressed data in bytes
52    /// * `compressed_data` - The compressed data bytes
53    /// * `digest` - Optional cryptographic digest of the content
54    ///
55    /// # Returns
56    ///
57    /// A `Result` containing the new `Compressed` object if successful,
58    /// or an error if the parameters are invalid.
59    ///
60    /// # Errors
61    ///
62    /// Returns an error if the compressed data is larger than the decompressed
63    /// size, which would indicate a logical inconsistency.
64    ///
65    /// # Example
66    ///
67    /// ```
68    /// use bc_components::Compressed;
69    /// use bc_crypto::hash::crc32;
70    ///
71    /// let data = b"hello world";
72    /// let checksum = crc32(data);
73    /// let decompressed_size = data.len();
74    ///
75    /// // In a real scenario, this would be actually compressed data
76    /// let compressed_data = data.to_vec();
77    ///
78    /// let compressed =
79    ///     Compressed::new(checksum, decompressed_size, compressed_data, None)
80    ///         .unwrap();
81    /// ```
82    pub fn new(
83        checksum: u32,
84        decompressed_size: usize,
85        compressed_data: Vec<u8>,
86        digest: Option<Digest>,
87    ) -> Result<Self> {
88        if compressed_data.len() > decompressed_size {
89            return Err(Error::compression(
90                "compressed data is larger than decompressed size",
91            ));
92        }
93        Ok(Self {
94            checksum,
95            decompressed_size,
96            compressed_data,
97            digest,
98        })
99    }
100
101    /// Creates a new `Compressed` object by compressing the provided data.
102    ///
103    /// This is the primary method for creating compressed data. It
104    /// automatically handles compression using the DEFLATE algorithm with a
105    /// compression level of 6.
106    ///
107    /// If the compressed data would be larger than the original data (which can
108    /// happen with small or already compressed inputs), the original data
109    /// is stored instead.
110    ///
111    /// # Parameters
112    ///
113    /// * `decompressed_data` - The original data to compress
114    /// * `digest` - Optional cryptographic digest of the content
115    ///
116    /// # Returns
117    ///
118    /// A new `Compressed` object containing the compressed (or original) data.
119    ///
120    /// # Example
121    ///
122    /// ```
123    /// use bc_components::Compressed;
124    ///
125    /// // Compress a string
126    /// let data = "This is a longer string that should compress well with repeated patterns. \
127    ///            This is a longer string that should compress well with repeated patterns.";
128    /// let compressed = Compressed::from_decompressed_data(data.as_bytes(), None);
129    ///
130    /// // The compressed size should be smaller than the original
131    /// assert!(compressed.compressed_size() < data.len());
132    ///
133    /// // We can recover the original data
134    /// let decompressed = compressed.decompress().unwrap();
135    /// assert_eq!(decompressed, data.as_bytes());
136    /// ```
137    pub fn from_decompressed_data(
138        decompressed_data: impl AsRef<[u8]>,
139        digest: Option<Digest>,
140    ) -> Self {
141        let decompressed_data = decompressed_data.as_ref();
142        let compressed_data = compress_to_vec(decompressed_data, 6);
143        let checksum = crc32(decompressed_data);
144        let decompressed_size = decompressed_data.len();
145        let compressed_size = compressed_data.len();
146        if compressed_size != 0 && compressed_size < decompressed_size {
147            Self {
148                checksum,
149                decompressed_size,
150                compressed_data,
151                digest,
152            }
153        } else {
154            Self {
155                checksum,
156                decompressed_size,
157                compressed_data: decompressed_data.to_vec(),
158                digest,
159            }
160        }
161    }
162
163    /// Decompresses and returns the original decompressed data.
164    ///
165    /// This method performs the reverse of the compression process, restoring
166    /// the original data. It also verifies the integrity of the data using the
167    /// stored checksum.
168    ///
169    /// # Returns
170    ///
171    /// A `Result` containing the decompressed data if successful,
172    /// or an error if decompression fails or the checksum doesn't match.
173    ///
174    /// # Errors
175    ///
176    /// Returns an error if:
177    /// - The compressed data is corrupt and cannot be decompressed
178    /// - The checksum of the decompressed data doesn't match the stored
179    ///   checksum
180    ///
181    /// # Example
182    ///
183    /// ```
184    /// use bc_components::Compressed;
185    ///
186    /// // Original data
187    /// let original = b"This is some example data to compress";
188    ///
189    /// // Compress it
190    /// let compressed = Compressed::from_decompressed_data(original, None);
191    ///
192    /// // Deompress to get the original data back
193    /// let decompressed = compressed.decompress().unwrap();
194    /// assert_eq!(decompressed, original);
195    /// ```
196    pub fn decompress(&self) -> Result<Vec<u8>> {
197        let compressed_size = self.compressed_data.len();
198        if compressed_size >= self.decompressed_size {
199            return Ok(self.compressed_data.clone());
200        }
201
202        let decompressed_data = decompress_to_vec(&self.compressed_data)
203            .map_err(|_| Error::compression("corrupt compressed data"))?;
204        if crc32(&decompressed_data) != self.checksum {
205            return Err(Error::compression(
206                "compressed data checksum mismatch",
207            ));
208        }
209
210        Ok(decompressed_data)
211    }
212
213    /// Returns the size of the compressed data in bytes.
214    ///
215    /// # Returns
216    ///
217    /// The size of the compressed data in bytes.
218    ///
219    /// # Example
220    ///
221    /// ```
222    /// use bc_components::Compressed;
223    ///
224    /// let data = b"Hello world!";
225    /// let compressed = Compressed::from_decompressed_data(data, None);
226    ///
227    /// // For small inputs like this, compression might not be effective
228    /// // so the compressed_size might equal the original size
229    /// println!("Compressed size: {}", compressed.compressed_size());
230    /// ```
231    pub fn compressed_size(&self) -> usize {
232        self.compressed_data.len()
233    }
234
235    /// Returns the compression ratio of the data.
236    ///
237    /// The compression ratio is calculated as (compressed size) / (decompressed
238    /// size), so lower values indicate better compression.
239    ///
240    /// # Returns
241    ///
242    /// A floating-point value representing the compression ratio.
243    /// - Values less than 1.0 indicate effective compression
244    /// - Values equal to 1.0 indicate no compression was applied
245    /// - Values of NaN can occur if the decompressed size is zero
246    ///
247    /// # Example
248    ///
249    /// ```
250    /// use bc_components::Compressed;
251    ///
252    /// // A string with a lot of repetition should compress well
253    /// let data = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
254    /// let compressed = Compressed::from_decompressed_data(data.as_bytes(), None);
255    ///
256    /// // Should have a good compression ratio (much less than 1.0)
257    /// let ratio = compressed.compression_ratio();
258    /// assert!(ratio < 0.5);
259    /// ```
260    pub fn compression_ratio(&self) -> f64 {
261        (self.compressed_size() as f64) / (self.decompressed_size as f64)
262    }
263
264    /// Returns a reference to the digest of the compressed data, if available.
265    ///
266    /// # Returns
267    ///
268    /// An optional reference to the `Digest` associated with this compressed
269    /// data.
270    ///
271    /// # Example
272    ///
273    /// ```
274    /// use bc_components::{Compressed, Digest};
275    ///
276    /// let data = b"Hello world!";
277    /// let digest = Digest::from_image(data);
278    /// let compressed =
279    ///     Compressed::from_decompressed_data(data, Some(digest.clone()));
280    ///
281    /// // We can retrieve the digest we associated with the compressed data
282    /// assert_eq!(compressed.digest_ref_opt(), Some(&digest));
283    /// ```
284    pub fn digest_ref_opt(&self) -> Option<&Digest> {
285        self.digest.as_ref()
286    }
287
288    /// Returns whether this compressed data has an associated digest.
289    ///
290    /// # Returns
291    ///
292    /// `true` if this compressed data has a digest, `false` otherwise.
293    ///
294    /// # Example
295    ///
296    /// ```
297    /// use bc_components::{Compressed, Digest};
298    ///
299    /// // Create compressed data without a digest
300    /// let compressed1 = Compressed::from_decompressed_data(b"Hello", None);
301    /// assert!(!compressed1.has_digest());
302    ///
303    /// // Create compressed data with a digest
304    /// let digest = Digest::from_image(b"Hello");
305    /// let compressed2 =
306    ///     Compressed::from_decompressed_data(b"Hello", Some(digest));
307    /// assert!(compressed2.has_digest());
308    /// ```
309    pub fn has_digest(&self) -> bool {
310        self.digest.is_some()
311    }
312}
313
314/// Implementation of the `DigestProvider` trait for `Compressed`.
315///
316/// Allows `Compressed` objects with digests to be used with APIs that accept
317/// `DigestProvider` implementations.
318impl DigestProvider for Compressed {
319    /// Returns the cryptographic digest associated with this compressed data.
320    ///
321    /// # Returns
322    ///
323    /// A `Cow<'_, Digest>` containing the digest.
324    ///
325    /// # Panics
326    ///
327    /// Panics if there is no digest associated with this compressed data.
328    /// Use `has_digest()` or `digest_ref_opt()` to check before calling this
329    /// method.
330    fn digest(&self) -> Cow<'_, Digest> {
331        Cow::Owned(self.digest.as_ref().unwrap().clone())
332    }
333}
334
335/// Implementation of the `Debug` trait for `Compressed`.
336///
337/// Provides a human-readable debug representation of a `Compressed` object
338/// showing its key properties: checksum, sizes, compression ratio, and digest.
339impl std::fmt::Debug for Compressed {
340    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
341        write!(
342            f,
343            "Compressed(checksum: {}, size: {}/{}, ratio: {:.2}, digest: {})",
344            hex::encode(self.checksum.to_be_bytes()),
345            self.compressed_size(),
346            self.decompressed_size,
347            self.compression_ratio(),
348            self.digest_ref_opt()
349                .map(|d| d.short_description())
350                .unwrap_or_else(|| "None".to_string())
351        )
352    }
353}
354
355/// Implementation of `AsRef<Compressed>` for `Compressed`.
356///
357/// This allows passing a `Compressed` instance to functions that take
358/// `AsRef<Compressed>` parameters.
359impl AsRef<Compressed> for Compressed {
360    fn as_ref(&self) -> &Compressed {
361        self
362    }
363}
364
365/// Implementation of the `CBORTagged` trait for `Compressed`.
366///
367/// Defines the CBOR tag(s) used when serializing a `Compressed` object.
368impl CBORTagged for Compressed {
369    fn cbor_tags() -> Vec<Tag> {
370        tags_for_values(&[tags::TAG_COMPRESSED])
371    }
372}
373
374/// Conversion from `Compressed` to CBOR for serialization.
375impl From<Compressed> for CBOR {
376    fn from(value: Compressed) -> Self {
377        value.tagged_cbor()
378    }
379}
380
381/// Implementation of CBOR encoding for `Compressed`.
382///
383/// Defines how a `Compressed` object is serialized to untagged CBOR.
384/// The format is:
385/// ```text
386/// [
387///   checksum: uint,
388///   decompressed_size: uint,
389///   compressed_data: bytes,
390///   digest?: Digest  // Optional
391/// ]
392/// ```
393impl CBORTaggedEncodable for Compressed {
394    fn untagged_cbor(&self) -> CBOR {
395        let mut elements = vec![
396            self.checksum.into(),
397            self.decompressed_size.into(),
398            CBOR::to_byte_string(&self.compressed_data),
399        ];
400        if let Some(digest) = self.digest.clone() {
401            elements.push(digest.into());
402        }
403        CBORCase::Array(elements).into()
404    }
405}
406
407/// Conversion from CBOR to `Compressed` for deserialization.
408impl TryFrom<CBOR> for Compressed {
409    type Error = dcbor::Error;
410
411    fn try_from(cbor: CBOR) -> dcbor::Result<Self> {
412        Self::from_tagged_cbor(cbor)
413    }
414}
415
416/// Implementation of CBOR decoding for `Compressed`.
417///
418/// Defines how to create a `Compressed` object from untagged CBOR.
419impl CBORTaggedDecodable for Compressed {
420    fn from_untagged_cbor(cbor: CBOR) -> dcbor::Result<Self> {
421        let elements = cbor.try_into_array()?;
422        if elements.len() < 3 || elements.len() > 4 {
423            return Err("invalid number of elements in compressed".into());
424        }
425        let checksum = elements[0].clone().try_into()?;
426        let decompressed_size = elements[1].clone().try_into()?;
427        let compressed_data = elements[2].clone().try_into_byte_string()?;
428        let digest = if elements.len() == 4 {
429            Some(elements[3].clone().try_into()?)
430        } else {
431            None
432        };
433        Ok(Self::new(
434            checksum,
435            decompressed_size,
436            compressed_data,
437            digest,
438        )?)
439    }
440}
441
442#[cfg(test)]
443mod tests {
444    use crate::Compressed;
445
446    #[test]
447    fn test_1() {
448        let source =
449            b"Lorem ipsum dolor sit amet consectetur adipiscing elit mi nibh ornare proin blandit diam ridiculus, faucibus mus dui eu vehicula nam donec dictumst sed vivamus bibendum aliquet efficitur. Felis imperdiet sodales dictum morbi vivamus augue dis duis aliquet velit ullamcorper porttitor, lobortis dapibus hac purus aliquam natoque iaculis blandit montes nunc pretium.";
450        let compressed = Compressed::from_decompressed_data(source, None);
451        assert_eq!(
452            format!("{:?}", compressed),
453            "Compressed(checksum: 3eeb10a0, size: 217/364, ratio: 0.60, digest: None)"
454        );
455        assert_eq!(compressed.decompress().unwrap(), source);
456    }
457
458    #[test]
459    fn test_2() {
460        let source = b"Lorem ipsum dolor sit amet consectetur adipiscing";
461        let compressed = Compressed::from_decompressed_data(source, None);
462        assert_eq!(
463            format!("{:?}", compressed),
464            "Compressed(checksum: 29db1793, size: 45/49, ratio: 0.92, digest: None)"
465        );
466        assert_eq!(compressed.decompress().unwrap(), source);
467    }
468
469    #[test]
470    fn test_3() {
471        let source = b"Lorem";
472        let compressed = Compressed::from_decompressed_data(source, None);
473        assert_eq!(
474            format!("{:?}", compressed),
475            "Compressed(checksum: 44989b39, size: 5/5, ratio: 1.00, digest: None)"
476        );
477        assert_eq!(compressed.decompress().unwrap(), source);
478    }
479
480    #[test]
481    fn test_4() {
482        let source = b"";
483        let compressed = Compressed::from_decompressed_data(source, None);
484        assert_eq!(
485            format!("{:?}", compressed),
486            "Compressed(checksum: 00000000, size: 0/0, ratio: NaN, digest: None)"
487        );
488        assert_eq!(compressed.decompress().unwrap(), source);
489    }
490}