bc_components/
compressed.rs

1use std::{borrow::Cow, fmt::Formatter};
2
3use anyhow::{Result, anyhow, bail};
4use bc_crypto::hash::crc32;
5use bc_ur::prelude::*;
6use miniz_oxide::{deflate::compress_to_vec, inflate::decompress_to_vec};
7
8use crate::{DigestProvider, digest::Digest, tags};
9
10/// A compressed binary object with integrity verification.
11///
12/// `Compressed` provides a way to efficiently store and transmit binary data
13/// using the DEFLATE compression algorithm. It includes built-in integrity
14/// verification through a CRC32 checksum and optional cryptographic digest.
15///
16/// The compression is implemented using the raw DEFLATE format as described in
17/// [IETF RFC 1951](https://www.ietf.org/rfc/rfc1951.txt) with the following
18/// configuration equivalent to:
19///
20/// `deflateInit2(zstream, 5, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY)`
21///
22/// Features:
23/// - Automatic compression with configurable compression level
24/// - Integrity verification via CRC32 checksum
25/// - Optional cryptographic digest for content identification
26/// - Smart behavior for small data (stores uncompressed if compression would
27///   increase size)
28/// - CBOR serialization/deserialization support
29#[derive(Clone, Eq, PartialEq)]
30pub struct Compressed {
31    /// CRC32 checksum of the uncompressed data for integrity verification
32    checksum: u32,
33    /// Size of the original uncompressed data in bytes
34    uncompressed_size: usize,
35    /// The compressed data (or original data if compression is ineffective)
36    compressed_data: Vec<u8>,
37    /// Optional cryptographic digest of the content
38    digest: Option<Digest>,
39}
40
41impl Compressed {
42    /// Creates a new `Compressed` object with the specified parameters.
43    ///
44    /// This is a low-level constructor that allows direct creation of a
45    /// `Compressed` object without performing compression. It's primarily
46    /// intended for deserialization or when working with pre-compressed
47    /// data.
48    ///
49    /// # Parameters
50    ///
51    /// * `checksum` - CRC32 checksum of the uncompressed data
52    /// * `uncompressed_size` - Size of the original uncompressed data in bytes
53    /// * `compressed_data` - The compressed data bytes
54    /// * `digest` - Optional cryptographic digest of the content
55    ///
56    /// # Returns
57    ///
58    /// A `Result` containing the new `Compressed` object if successful,
59    /// or an error if the parameters are invalid.
60    ///
61    /// # Errors
62    ///
63    /// Returns an error if the compressed data is larger than the uncompressed
64    /// size, which would indicate a logical inconsistency.
65    ///
66    /// # Example
67    ///
68    /// ```
69    /// use bc_components::Compressed;
70    /// use bc_crypto::hash::crc32;
71    ///
72    /// let data = b"hello world";
73    /// let checksum = crc32(data);
74    /// let uncompressed_size = data.len();
75    ///
76    /// // In a real scenario, this would be actually compressed data
77    /// let compressed_data = data.to_vec();
78    ///
79    /// let compressed =
80    ///     Compressed::new(checksum, uncompressed_size, compressed_data, None)
81    ///         .unwrap();
82    /// ```
83    pub fn new(
84        checksum: u32,
85        uncompressed_size: usize,
86        compressed_data: Vec<u8>,
87        digest: Option<Digest>,
88    ) -> Result<Self> {
89        if compressed_data.len() > uncompressed_size {
90            bail!("Compressed data is larger than uncompressed size");
91        }
92        Ok(Self {
93            checksum,
94            uncompressed_size,
95            compressed_data,
96            digest,
97        })
98    }
99
100    /// Creates a new `Compressed` object by compressing the provided data.
101    ///
102    /// This is the primary method for creating compressed data. It
103    /// automatically handles compression using the DEFLATE algorithm with a
104    /// compression level of 6.
105    ///
106    /// If the compressed data would be larger than the original data (which can
107    /// happen with small or already compressed inputs), the original data
108    /// is stored instead.
109    ///
110    /// # Parameters
111    ///
112    /// * `uncompressed_data` - The original data to compress
113    /// * `digest` - Optional cryptographic digest of the content
114    ///
115    /// # Returns
116    ///
117    /// A new `Compressed` object containing the compressed (or original) data.
118    ///
119    /// # Example
120    ///
121    /// ```
122    /// use bc_components::Compressed;
123    ///
124    /// // Compress a string
125    /// let data = "This is a longer string that should compress well with repeated patterns. \
126    ///            This is a longer string that should compress well with repeated patterns.";
127    /// let compressed = Compressed::from_uncompressed_data(data.as_bytes(), None);
128    ///
129    /// // The compressed size should be smaller than the original
130    /// assert!(compressed.compressed_size() < data.len());
131    ///
132    /// // We can recover the original data
133    /// let uncompressed = compressed.uncompress().unwrap();
134    /// assert_eq!(uncompressed, data.as_bytes());
135    /// ```
136    pub fn from_uncompressed_data(
137        uncompressed_data: impl AsRef<[u8]>,
138        digest: Option<Digest>,
139    ) -> Self {
140        let uncompressed_data = uncompressed_data.as_ref();
141        let compressed_data = compress_to_vec(uncompressed_data, 6);
142        let checksum = crc32(uncompressed_data);
143        let uncompressed_size = uncompressed_data.len();
144        let compressed_size = compressed_data.len();
145        if compressed_size != 0 && compressed_size < uncompressed_size {
146            Self {
147                checksum,
148                uncompressed_size,
149                compressed_data,
150                digest,
151            }
152        } else {
153            Self {
154                checksum,
155                uncompressed_size,
156                compressed_data: uncompressed_data.to_vec(),
157                digest,
158            }
159        }
160    }
161
162    /// Decompresses and returns the original uncompressed data.
163    ///
164    /// This method performs the reverse of the compression process, restoring
165    /// the original data. It also verifies the integrity of the data using the
166    /// stored checksum.
167    ///
168    /// # Returns
169    ///
170    /// A `Result` containing the uncompressed data if successful,
171    /// or an error if decompression fails or the checksum doesn't match.
172    ///
173    /// # Errors
174    ///
175    /// Returns an error if:
176    /// - The compressed data is corrupt and cannot be decompressed
177    /// - The checksum of the decompressed data doesn't match the stored
178    ///   checksum
179    ///
180    /// # Example
181    ///
182    /// ```
183    /// use bc_components::Compressed;
184    ///
185    /// // Original data
186    /// let original = b"This is some example data to compress";
187    ///
188    /// // Compress it
189    /// let compressed = Compressed::from_uncompressed_data(original, None);
190    ///
191    /// // Uncompress to get the original data back
192    /// let uncompressed = compressed.uncompress().unwrap();
193    /// assert_eq!(uncompressed, original);
194    /// ```
195    pub fn uncompress(&self) -> Result<Vec<u8>> {
196        let compressed_size = self.compressed_data.len();
197        if compressed_size >= self.uncompressed_size {
198            return Ok(self.compressed_data.clone());
199        }
200
201        let uncompressed_data = decompress_to_vec(&self.compressed_data)
202            .map_err(|_| anyhow!("corrupt compressed data"))?;
203        if crc32(&uncompressed_data) != self.checksum {
204            bail!("compressed data checksum mismatch");
205        }
206
207        Ok(uncompressed_data)
208    }
209
210    /// Returns the size of the compressed data in bytes.
211    ///
212    /// # Returns
213    ///
214    /// The size of the compressed data in bytes.
215    ///
216    /// # Example
217    ///
218    /// ```
219    /// use bc_components::Compressed;
220    ///
221    /// let data = b"Hello world!";
222    /// let compressed = Compressed::from_uncompressed_data(data, None);
223    ///
224    /// // For small inputs like this, compression might not be effective
225    /// // so the compressed_size might equal the original size
226    /// println!("Compressed size: {}", compressed.compressed_size());
227    /// ```
228    pub fn compressed_size(&self) -> usize { self.compressed_data.len() }
229
230    /// Returns the compression ratio of the data.
231    ///
232    /// The compression ratio is calculated as (compressed size) / (uncompressed
233    /// size), so lower values indicate better compression.
234    ///
235    /// # Returns
236    ///
237    /// A floating-point value representing the compression ratio.
238    /// - Values less than 1.0 indicate effective compression
239    /// - Values equal to 1.0 indicate no compression was applied
240    /// - Values of NaN can occur if the uncompressed size is zero
241    ///
242    /// # Example
243    ///
244    /// ```
245    /// use bc_components::Compressed;
246    ///
247    /// // A string with a lot of repetition should compress well
248    /// let data = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
249    /// let compressed = Compressed::from_uncompressed_data(data.as_bytes(), None);
250    ///
251    /// // Should have a good compression ratio (much less than 1.0)
252    /// let ratio = compressed.compression_ratio();
253    /// assert!(ratio < 0.5);
254    /// ```
255    pub fn compression_ratio(&self) -> f64 {
256        (self.compressed_size() as f64) / (self.uncompressed_size as f64)
257    }
258
259    /// Returns a reference to the digest of the compressed data, if available.
260    ///
261    /// # Returns
262    ///
263    /// An optional reference to the `Digest` associated with this compressed
264    /// data.
265    ///
266    /// # Example
267    ///
268    /// ```
269    /// use bc_components::{Compressed, Digest};
270    ///
271    /// let data = b"Hello world!";
272    /// let digest = Digest::from_image(data);
273    /// let compressed =
274    ///     Compressed::from_uncompressed_data(data, Some(digest.clone()));
275    ///
276    /// // We can retrieve the digest we associated with the compressed data
277    /// assert_eq!(compressed.digest_ref_opt(), Some(&digest));
278    /// ```
279    pub fn digest_ref_opt(&self) -> Option<&Digest> { self.digest.as_ref() }
280
281    /// Returns whether this compressed data has an associated digest.
282    ///
283    /// # Returns
284    ///
285    /// `true` if this compressed data has a digest, `false` otherwise.
286    ///
287    /// # Example
288    ///
289    /// ```
290    /// use bc_components::{Compressed, Digest};
291    ///
292    /// // Create compressed data without a digest
293    /// let compressed1 = Compressed::from_uncompressed_data(b"Hello", None);
294    /// assert!(!compressed1.has_digest());
295    ///
296    /// // Create compressed data with a digest
297    /// let digest = Digest::from_image(b"Hello");
298    /// let compressed2 =
299    ///     Compressed::from_uncompressed_data(b"Hello", Some(digest));
300    /// assert!(compressed2.has_digest());
301    /// ```
302    pub fn has_digest(&self) -> bool { self.digest.is_some() }
303}
304
305/// Implementation of the `DigestProvider` trait for `Compressed`.
306///
307/// Allows `Compressed` objects with digests to be used with APIs that accept
308/// `DigestProvider` implementations.
309impl DigestProvider for Compressed {
310    /// Returns the cryptographic digest associated with this compressed data.
311    ///
312    /// # Returns
313    ///
314    /// A `Cow<'_, Digest>` containing the digest.
315    ///
316    /// # Panics
317    ///
318    /// Panics if there is no digest associated with this compressed data.
319    /// Use `has_digest()` or `digest_ref_opt()` to check before calling this
320    /// method.
321    fn digest(&self) -> Cow<'_, Digest> {
322        Cow::Owned(self.digest.as_ref().unwrap().clone())
323    }
324}
325
326/// Implementation of the `Debug` trait for `Compressed`.
327///
328/// Provides a human-readable debug representation of a `Compressed` object
329/// showing its key properties: checksum, sizes, compression ratio, and digest.
330impl std::fmt::Debug for Compressed {
331    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
332        write!(
333            f,
334            "Compressed(checksum: {}, size: {}/{}, ratio: {:.2}, digest: {})",
335            hex::encode(self.checksum.to_be_bytes()),
336            self.compressed_size(),
337            self.uncompressed_size,
338            self.compression_ratio(),
339            self.digest_ref_opt()
340                .map(|d| d.short_description())
341                .unwrap_or_else(|| "None".to_string())
342        )
343    }
344}
345
346/// Implementation of `AsRef<Compressed>` for `Compressed`.
347///
348/// This allows passing a `Compressed` instance to functions that take
349/// `AsRef<Compressed>` parameters.
350impl AsRef<Compressed> for Compressed {
351    fn as_ref(&self) -> &Compressed { self }
352}
353
354/// Implementation of the `CBORTagged` trait for `Compressed`.
355///
356/// Defines the CBOR tag(s) used when serializing a `Compressed` object.
357impl CBORTagged for Compressed {
358    fn cbor_tags() -> Vec<Tag> { tags_for_values(&[tags::TAG_COMPRESSED]) }
359}
360
361/// Conversion from `Compressed` to CBOR for serialization.
362impl From<Compressed> for CBOR {
363    fn from(value: Compressed) -> Self { value.tagged_cbor() }
364}
365
366/// Implementation of CBOR encoding for `Compressed`.
367///
368/// Defines how a `Compressed` object is serialized to untagged CBOR.
369/// The format is:
370/// ```text
371/// [
372///   checksum: uint,
373///   uncompressed_size: uint,
374///   compressed_data: bytes,
375///   digest?: Digest  // Optional
376/// ]
377/// ```
378impl CBORTaggedEncodable for Compressed {
379    fn untagged_cbor(&self) -> CBOR {
380        let mut elements = vec![
381            self.checksum.into(),
382            self.uncompressed_size.into(),
383            CBOR::to_byte_string(&self.compressed_data),
384        ];
385        if let Some(digest) = self.digest.clone() {
386            elements.push(digest.into());
387        }
388        CBORCase::Array(elements).into()
389    }
390}
391
392/// Conversion from CBOR to `Compressed` for deserialization.
393impl TryFrom<CBOR> for Compressed {
394    type Error = dcbor::Error;
395
396    fn try_from(cbor: CBOR) -> dcbor::Result<Self> {
397        Self::from_tagged_cbor(cbor)
398    }
399}
400
401/// Implementation of CBOR decoding for `Compressed`.
402///
403/// Defines how to create a `Compressed` object from untagged CBOR.
404impl CBORTaggedDecodable for Compressed {
405    fn from_untagged_cbor(cbor: CBOR) -> dcbor::Result<Self> {
406        let elements = cbor.try_into_array()?;
407        if elements.len() < 3 || elements.len() > 4 {
408            return Err("invalid number of elements in compressed".into());
409        }
410        let checksum = elements[0].clone().try_into()?;
411        let uncompressed_size = elements[1].clone().try_into()?;
412        let compressed_data = elements[2].clone().try_into_byte_string()?;
413        let digest = if elements.len() == 4 {
414            Some(elements[3].clone().try_into()?)
415        } else {
416            None
417        };
418        Ok(Self::new(
419            checksum,
420            uncompressed_size,
421            compressed_data,
422            digest,
423        )?)
424    }
425}
426
427#[cfg(test)]
428mod tests {
429    use crate::Compressed;
430
431    #[test]
432    fn test_1() {
433        let source =
434            b"Lorem ipsum dolor sit amet consectetur adipiscing elit mi nibh ornare proin blandit diam ridiculus, faucibus mus dui eu vehicula nam donec dictumst sed vivamus bibendum aliquet efficitur. Felis imperdiet sodales dictum morbi vivamus augue dis duis aliquet velit ullamcorper porttitor, lobortis dapibus hac purus aliquam natoque iaculis blandit montes nunc pretium.";
435        let compressed = Compressed::from_uncompressed_data(source, None);
436        assert_eq!(
437            format!("{:?}", compressed),
438            "Compressed(checksum: 3eeb10a0, size: 217/364, ratio: 0.60, digest: None)"
439        );
440        assert_eq!(compressed.uncompress().unwrap(), source);
441    }
442
443    #[test]
444    fn test_2() {
445        let source = b"Lorem ipsum dolor sit amet consectetur adipiscing";
446        let compressed = Compressed::from_uncompressed_data(source, None);
447        assert_eq!(
448            format!("{:?}", compressed),
449            "Compressed(checksum: 29db1793, size: 45/49, ratio: 0.92, digest: None)"
450        );
451        assert_eq!(compressed.uncompress().unwrap(), source);
452    }
453
454    #[test]
455    fn test_3() {
456        let source = b"Lorem";
457        let compressed = Compressed::from_uncompressed_data(source, None);
458        assert_eq!(
459            format!("{:?}", compressed),
460            "Compressed(checksum: 44989b39, size: 5/5, ratio: 1.00, digest: None)"
461        );
462        assert_eq!(compressed.uncompress().unwrap(), source);
463    }
464
465    #[test]
466    fn test_4() {
467        let source = b"";
468        let compressed = Compressed::from_uncompressed_data(source, None);
469        assert_eq!(
470            format!("{:?}", compressed),
471            "Compressed(checksum: 00000000, size: 0/0, ratio: NaN, digest: None)"
472        );
473        assert_eq!(compressed.uncompress().unwrap(), source);
474    }
475}