bc_components/
compressed.rs

1use std::{ fmt::Formatter, borrow::Cow };
2use bc_ur::prelude::*;
3use bc_crypto::hash::crc32;
4use miniz_oxide::{ inflate::decompress_to_vec, deflate::compress_to_vec };
5use crate::{ digest::Digest, DigestProvider, tags };
6use anyhow::{ anyhow, bail, Result };
7
8/// A compressed binary object with integrity verification.
9///
10/// `Compressed` provides a way to efficiently store and transmit binary data using
11/// the DEFLATE compression algorithm. It includes built-in integrity verification
12/// through a CRC32 checksum and optional cryptographic digest.
13///
14/// The compression is implemented using the raw DEFLATE format as described in
15/// [IETF RFC 1951](https://www.ietf.org/rfc/rfc1951.txt) with the following
16/// configuration equivalent to:
17///
18/// `deflateInit2(zstream, 5, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY)`
19///
20/// Features:
21/// - Automatic compression with configurable compression level
22/// - Integrity verification via CRC32 checksum
23/// - Optional cryptographic digest for content identification
24/// - Smart behavior for small data (stores uncompressed if compression would increase size)
25/// - CBOR serialization/deserialization support
26#[derive(Clone, Eq, PartialEq)]
27pub struct Compressed {
28    /// CRC32 checksum of the uncompressed data for integrity verification
29    checksum: u32,
30    /// Size of the original uncompressed data in bytes
31    uncompressed_size: usize,
32    /// The compressed data (or original data if compression is ineffective)
33    compressed_data: Vec<u8>,
34    /// Optional cryptographic digest of the content
35    digest: Option<Digest>,
36}
37
38impl Compressed {
39    /// Creates a new `Compressed` object with the specified parameters.
40    ///
41    /// This is a low-level constructor that allows direct creation of a `Compressed`
42    /// object without performing compression. It's primarily intended for deserialization
43    /// or when working with pre-compressed data.
44    ///
45    /// # Parameters
46    ///
47    /// * `checksum` - CRC32 checksum of the uncompressed data
48    /// * `uncompressed_size` - Size of the original uncompressed data in bytes
49    /// * `compressed_data` - The compressed data bytes
50    /// * `digest` - Optional cryptographic digest of the content
51    ///
52    /// # Returns
53    ///
54    /// A `Result` containing the new `Compressed` object if successful,
55    /// or an error if the parameters are invalid.
56    ///
57    /// # Errors
58    ///
59    /// Returns an error if the compressed data is larger than the uncompressed size,
60    /// which would indicate a logical inconsistency.
61    ///
62    /// # Example
63    ///
64    /// ```
65    /// use bc_components::Compressed;
66    /// use bc_crypto::hash::crc32;
67    ///
68    /// let data = b"hello world";
69    /// let checksum = crc32(data);
70    /// let uncompressed_size = data.len();
71    ///
72    /// // In a real scenario, this would be actually compressed data
73    /// let compressed_data = data.to_vec();
74    ///
75    /// let compressed = Compressed::new(
76    ///     checksum,
77    ///     uncompressed_size,
78    ///     compressed_data,
79    ///     None
80    /// ).unwrap();
81    /// ```
82    pub fn new(
83        checksum: u32,
84        uncompressed_size: usize,
85        compressed_data: Vec<u8>,
86        digest: Option<Digest>
87    ) -> Result<Self> {
88        if compressed_data.len() > uncompressed_size {
89            bail!("Compressed data is larger than uncompressed size");
90        }
91        Ok(Self {
92            checksum,
93            uncompressed_size,
94            compressed_data,
95            digest,
96        })
97    }
98
99    /// Creates a new `Compressed` object by compressing the provided data.
100    ///
101    /// This is the primary method for creating compressed data. It automatically
102    /// handles compression using the DEFLATE algorithm with a compression level of 6.
103    ///
104    /// If the compressed data would be larger than the original data (which can happen
105    /// with small or already compressed inputs), the original data is stored instead.
106    ///
107    /// # Parameters
108    ///
109    /// * `uncompressed_data` - The original data to compress
110    /// * `digest` - Optional cryptographic digest of the content
111    ///
112    /// # Returns
113    ///
114    /// A new `Compressed` object containing the compressed (or original) data.
115    ///
116    /// # Example
117    ///
118    /// ```
119    /// use bc_components::Compressed;
120    ///
121    /// // Compress a string
122    /// let data = "This is a longer string that should compress well with repeated patterns. \
123    ///            This is a longer string that should compress well with repeated patterns.";
124    /// let compressed = Compressed::from_uncompressed_data(data.as_bytes(), None);
125    ///
126    /// // The compressed size should be smaller than the original
127    /// assert!(compressed.compressed_size() < data.len());
128    ///
129    /// // We can recover the original data
130    /// let uncompressed = compressed.uncompress().unwrap();
131    /// assert_eq!(uncompressed, data.as_bytes());
132    /// ```
133    pub fn from_uncompressed_data(
134        uncompressed_data: impl Into<Vec<u8>>,
135        digest: Option<Digest>
136    ) -> Self {
137        let uncompressed_data = uncompressed_data.into();
138        let compressed_data = compress_to_vec(&uncompressed_data, 6);
139        let checksum = crc32(&uncompressed_data);
140        let uncompressed_size = uncompressed_data.len();
141        let compressed_size = compressed_data.len();
142        if compressed_size != 0 && compressed_size < uncompressed_size {
143            Self {
144                checksum,
145                uncompressed_size,
146                compressed_data,
147                digest,
148            }
149        } else {
150            Self {
151                checksum,
152                uncompressed_size,
153                compressed_data: uncompressed_data,
154                digest,
155            }
156        }
157    }
158
159    /// Decompresses and returns the original uncompressed data.
160    ///
161    /// This method performs the reverse of the compression process, restoring
162    /// the original data. It also verifies the integrity of the data using the
163    /// stored checksum.
164    ///
165    /// # Returns
166    ///
167    /// A `Result` containing the uncompressed data if successful,
168    /// or an error if decompression fails or the checksum doesn't match.
169    ///
170    /// # Errors
171    ///
172    /// Returns an error if:
173    /// - The compressed data is corrupt and cannot be decompressed
174    /// - The checksum of the decompressed data doesn't match the stored checksum
175    ///
176    /// # Example
177    ///
178    /// ```
179    /// use bc_components::Compressed;
180    ///
181    /// // Original data
182    /// let original = b"This is some example data to compress";
183    ///
184    /// // Compress it
185    /// let compressed = Compressed::from_uncompressed_data(original, None);
186    ///
187    /// // Uncompress to get the original data back
188    /// let uncompressed = compressed.uncompress().unwrap();
189    /// assert_eq!(uncompressed, original);
190    /// ```
191    pub fn uncompress(&self) -> Result<Vec<u8>> {
192        let compressed_size = self.compressed_data.len();
193        if compressed_size >= self.uncompressed_size {
194            return Ok(self.compressed_data.clone());
195        }
196
197        let uncompressed_data = decompress_to_vec(&self.compressed_data).map_err(|_|
198            anyhow!("corrupt compressed data")
199        )?;
200        if crc32(&uncompressed_data) != self.checksum {
201            bail!("compressed data checksum mismatch");
202        }
203
204        Ok(uncompressed_data)
205    }
206
207    /// Returns the size of the compressed data in bytes.
208    ///
209    /// # Returns
210    ///
211    /// The size of the compressed data in bytes.
212    ///
213    /// # Example
214    ///
215    /// ```
216    /// use bc_components::Compressed;
217    ///
218    /// let data = b"Hello world!";
219    /// let compressed = Compressed::from_uncompressed_data(data, None);
220    ///
221    /// // For small inputs like this, compression might not be effective
222    /// // so the compressed_size might equal the original size
223    /// println!("Compressed size: {}", compressed.compressed_size());
224    /// ```
225    pub fn compressed_size(&self) -> usize {
226        self.compressed_data.len()
227    }
228
229    /// Returns the compression ratio of the data.
230    ///
231    /// The compression ratio is calculated as (compressed size) / (uncompressed size),
232    /// so lower values indicate better compression.
233    ///
234    /// # Returns
235    ///
236    /// A floating-point value representing the compression ratio.
237    /// - Values less than 1.0 indicate effective compression
238    /// - Values equal to 1.0 indicate no compression was applied
239    /// - Values of NaN can occur if the uncompressed size is zero
240    ///
241    /// # Example
242    ///
243    /// ```
244    /// use bc_components::Compressed;
245    ///
246    /// // A string with a lot of repetition should compress well
247    /// let data = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
248    /// let compressed = Compressed::from_uncompressed_data(data.as_bytes(), None);
249    ///
250    /// // Should have a good compression ratio (much less than 1.0)
251    /// let ratio = compressed.compression_ratio();
252    /// assert!(ratio < 0.5);
253    /// ```
254    pub fn compression_ratio(&self) -> f64 {
255        (self.compressed_size() as f64) / (self.uncompressed_size as f64)
256    }
257
258    /// Returns a reference to the digest of the compressed data, if available.
259    ///
260    /// # Returns
261    ///
262    /// An optional reference to the `Digest` associated with this compressed data.
263    ///
264    /// # Example
265    ///
266    /// ```
267    /// use bc_components::{Compressed, Digest};
268    ///
269    /// let data = b"Hello world!";
270    /// let digest = Digest::from_image(data);
271    /// let compressed = Compressed::from_uncompressed_data(data, Some(digest.clone()));
272    ///
273    /// // We can retrieve the digest we associated with the compressed data
274    /// assert_eq!(compressed.digest_ref_opt(), Some(&digest));
275    /// ```
276    pub fn digest_ref_opt(&self) -> Option<&Digest> {
277        self.digest.as_ref()
278    }
279
280    /// Returns whether this compressed data has an associated digest.
281    ///
282    /// # Returns
283    ///
284    /// `true` if this compressed data has a digest, `false` otherwise.
285    ///
286    /// # Example
287    ///
288    /// ```
289    /// use bc_components::{Compressed, Digest};
290    ///
291    /// // Create compressed data without a digest
292    /// let compressed1 = Compressed::from_uncompressed_data(b"Hello", None);
293    /// assert!(!compressed1.has_digest());
294    ///
295    /// // Create compressed data with a digest
296    /// let digest = Digest::from_image(b"Hello");
297    /// let compressed2 = Compressed::from_uncompressed_data(b"Hello", Some(digest));
298    /// assert!(compressed2.has_digest());
299    /// ```
300    pub fn has_digest(&self) -> bool {
301        self.digest.is_some()
302    }
303}
304
305/// Implementation of the `DigestProvider` trait for `Compressed`.
306///
307/// Allows `Compressed` objects with digests to be used with APIs that accept
308/// `DigestProvider` implementations.
309impl DigestProvider for Compressed {
310    /// Returns the cryptographic digest associated with this compressed data.
311    ///
312    /// # Returns
313    ///
314    /// A `Cow<'_, Digest>` containing the digest.
315    ///
316    /// # Panics
317    ///
318    /// Panics if there is no digest associated with this compressed data.
319    /// Use `has_digest()` or `digest_ref_opt()` to check before calling this method.
320    fn digest(&self) -> Cow<'_, Digest> {
321        Cow::Owned(self.digest.as_ref().unwrap().clone())
322    }
323}
324
325/// Implementation of the `Debug` trait for `Compressed`.
326///
327/// Provides a human-readable debug representation of a `Compressed` object
328/// showing its key properties: checksum, sizes, compression ratio, and digest.
329impl std::fmt::Debug for Compressed {
330    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
331        write!(
332            f,
333            "Compressed(checksum: {}, size: {}/{}, ratio: {:.2}, digest: {})",
334            hex::encode(self.checksum.to_be_bytes()),
335            self.compressed_size(),
336            self.uncompressed_size,
337            self.compression_ratio(),
338            self
339                .digest_ref_opt()
340                .map(|d| d.short_description())
341                .unwrap_or_else(|| "None".to_string())
342        )
343    }
344}
345
346/// Implementation of `AsRef<Compressed>` for `Compressed`.
347///
348/// This allows passing a `Compressed` instance to functions that take
349/// `AsRef<Compressed>` parameters.
350impl AsRef<Compressed> for Compressed {
351    fn as_ref(&self) -> &Compressed {
352        self
353    }
354}
355
356/// Implementation of the `CBORTagged` trait for `Compressed`.
357///
358/// Defines the CBOR tag(s) used when serializing a `Compressed` object.
359impl CBORTagged for Compressed {
360    fn cbor_tags() -> Vec<Tag> {
361        tags_for_values(&[tags::TAG_COMPRESSED])
362    }
363}
364
365/// Conversion from `Compressed` to CBOR for serialization.
366impl From<Compressed> for CBOR {
367    fn from(value: Compressed) -> Self {
368        value.tagged_cbor()
369    }
370}
371
372/// Implementation of CBOR encoding for `Compressed`.
373///
374/// Defines how a `Compressed` object is serialized to untagged CBOR.
375/// The format is:
376/// ```text
377/// [
378///   checksum: uint,
379///   uncompressed_size: uint,
380///   compressed_data: bytes,
381///   digest?: Digest  // Optional
382/// ]
383/// ```
384impl CBORTaggedEncodable for Compressed {
385    fn untagged_cbor(&self) -> CBOR {
386        let mut elements = vec![
387            self.checksum.into(),
388            self.uncompressed_size.into(),
389            CBOR::to_byte_string(&self.compressed_data)
390        ];
391        if let Some(digest) = self.digest.clone() {
392            elements.push(digest.into());
393        }
394        CBORCase::Array(elements).into()
395    }
396}
397
398/// Conversion from CBOR to `Compressed` for deserialization.
399impl TryFrom<CBOR> for Compressed {
400    type Error = dcbor::Error;
401
402    fn try_from(cbor: CBOR) -> dcbor::Result<Self> {
403        Self::from_tagged_cbor(cbor)
404    }
405}
406
407/// Implementation of CBOR decoding for `Compressed`.
408///
409/// Defines how to create a `Compressed` object from untagged CBOR.
410impl CBORTaggedDecodable for Compressed {
411    fn from_untagged_cbor(cbor: CBOR) -> dcbor::Result<Self> {
412        let elements = cbor.try_into_array()?;
413        if elements.len() < 3 || elements.len() > 4 {
414            return Err("invalid number of elements in compressed".into());
415        }
416        let checksum = elements[0].clone().try_into()?;
417        let uncompressed_size = elements[1].clone().try_into()?;
418        let compressed_data = elements[2].clone().try_into_byte_string()?;
419        let digest = if elements.len() == 4 { Some(elements[3].clone().try_into()?) } else { None };
420        Ok(Self::new(checksum, uncompressed_size, compressed_data, digest)?)
421    }
422}
423
424#[cfg(test)]
425mod tests {
426    use crate::Compressed;
427
428    #[test]
429    fn test_1() {
430        let source =
431            b"Lorem ipsum dolor sit amet consectetur adipiscing elit mi nibh ornare proin blandit diam ridiculus, faucibus mus dui eu vehicula nam donec dictumst sed vivamus bibendum aliquet efficitur. Felis imperdiet sodales dictum morbi vivamus augue dis duis aliquet velit ullamcorper porttitor, lobortis dapibus hac purus aliquam natoque iaculis blandit montes nunc pretium.";
432        let compressed = Compressed::from_uncompressed_data(source, None);
433        assert_eq!(
434            format!("{:?}", compressed),
435            "Compressed(checksum: 3eeb10a0, size: 217/364, ratio: 0.60, digest: None)"
436        );
437        assert_eq!(compressed.uncompress().unwrap(), source);
438    }
439
440    #[test]
441    fn test_2() {
442        let source = b"Lorem ipsum dolor sit amet consectetur adipiscing";
443        let compressed = Compressed::from_uncompressed_data(source, None);
444        assert_eq!(
445            format!("{:?}", compressed),
446            "Compressed(checksum: 29db1793, size: 45/49, ratio: 0.92, digest: None)"
447        );
448        assert_eq!(compressed.uncompress().unwrap(), source);
449    }
450
451    #[test]
452    fn test_3() {
453        let source = b"Lorem";
454        let compressed = Compressed::from_uncompressed_data(source, None);
455        assert_eq!(
456            format!("{:?}", compressed),
457            "Compressed(checksum: 44989b39, size: 5/5, ratio: 1.00, digest: None)"
458        );
459        assert_eq!(compressed.uncompress().unwrap(), source);
460    }
461
462    #[test]
463    fn test_4() {
464        let source = b"";
465        let compressed = Compressed::from_uncompressed_data(source, None);
466        assert_eq!(
467            format!("{:?}", compressed),
468            "Compressed(checksum: 00000000, size: 0/0, ratio: NaN, digest: None)"
469        );
470        assert_eq!(compressed.uncompress().unwrap(), source);
471    }
472}