bc_components/compressed.rs
1use std::{ fmt::Formatter, borrow::Cow };
2use bc_ur::prelude::*;
3use bc_crypto::hash::crc32;
4use miniz_oxide::{ inflate::decompress_to_vec, deflate::compress_to_vec };
5use crate::{ digest::Digest, DigestProvider, tags };
6use anyhow::{ anyhow, bail, Result };
7
8/// A compressed binary object with integrity verification.
9///
10/// `Compressed` provides a way to efficiently store and transmit binary data using
11/// the DEFLATE compression algorithm. It includes built-in integrity verification
12/// through a CRC32 checksum and optional cryptographic digest.
13///
14/// The compression is implemented using the raw DEFLATE format as described in
15/// [IETF RFC 1951](https://www.ietf.org/rfc/rfc1951.txt) with the following
16/// configuration equivalent to:
17///
18/// `deflateInit2(zstream, 5, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY)`
19///
20/// Features:
21/// - Automatic compression with configurable compression level
22/// - Integrity verification via CRC32 checksum
23/// - Optional cryptographic digest for content identification
24/// - Smart behavior for small data (stores uncompressed if compression would increase size)
25/// - CBOR serialization/deserialization support
26#[derive(Clone, Eq, PartialEq)]
27pub struct Compressed {
28 /// CRC32 checksum of the uncompressed data for integrity verification
29 checksum: u32,
30 /// Size of the original uncompressed data in bytes
31 uncompressed_size: usize,
32 /// The compressed data (or original data if compression is ineffective)
33 compressed_data: Vec<u8>,
34 /// Optional cryptographic digest of the content
35 digest: Option<Digest>,
36}
37
38impl Compressed {
39 /// Creates a new `Compressed` object with the specified parameters.
40 ///
41 /// This is a low-level constructor that allows direct creation of a `Compressed`
42 /// object without performing compression. It's primarily intended for deserialization
43 /// or when working with pre-compressed data.
44 ///
45 /// # Parameters
46 ///
47 /// * `checksum` - CRC32 checksum of the uncompressed data
48 /// * `uncompressed_size` - Size of the original uncompressed data in bytes
49 /// * `compressed_data` - The compressed data bytes
50 /// * `digest` - Optional cryptographic digest of the content
51 ///
52 /// # Returns
53 ///
54 /// A `Result` containing the new `Compressed` object if successful,
55 /// or an error if the parameters are invalid.
56 ///
57 /// # Errors
58 ///
59 /// Returns an error if the compressed data is larger than the uncompressed size,
60 /// which would indicate a logical inconsistency.
61 ///
62 /// # Example
63 ///
64 /// ```
65 /// use bc_components::Compressed;
66 /// use bc_crypto::hash::crc32;
67 ///
68 /// let data = b"hello world";
69 /// let checksum = crc32(data);
70 /// let uncompressed_size = data.len();
71 ///
72 /// // In a real scenario, this would be actually compressed data
73 /// let compressed_data = data.to_vec();
74 ///
75 /// let compressed = Compressed::new(
76 /// checksum,
77 /// uncompressed_size,
78 /// compressed_data,
79 /// None
80 /// ).unwrap();
81 /// ```
82 pub fn new(
83 checksum: u32,
84 uncompressed_size: usize,
85 compressed_data: Vec<u8>,
86 digest: Option<Digest>
87 ) -> Result<Self> {
88 if compressed_data.len() > uncompressed_size {
89 bail!("Compressed data is larger than uncompressed size");
90 }
91 Ok(Self {
92 checksum,
93 uncompressed_size,
94 compressed_data,
95 digest,
96 })
97 }
98
99 /// Creates a new `Compressed` object by compressing the provided data.
100 ///
101 /// This is the primary method for creating compressed data. It automatically
102 /// handles compression using the DEFLATE algorithm with a compression level of 6.
103 ///
104 /// If the compressed data would be larger than the original data (which can happen
105 /// with small or already compressed inputs), the original data is stored instead.
106 ///
107 /// # Parameters
108 ///
109 /// * `uncompressed_data` - The original data to compress
110 /// * `digest` - Optional cryptographic digest of the content
111 ///
112 /// # Returns
113 ///
114 /// A new `Compressed` object containing the compressed (or original) data.
115 ///
116 /// # Example
117 ///
118 /// ```
119 /// use bc_components::Compressed;
120 ///
121 /// // Compress a string
122 /// let data = "This is a longer string that should compress well with repeated patterns. \
123 /// This is a longer string that should compress well with repeated patterns.";
124 /// let compressed = Compressed::from_uncompressed_data(data.as_bytes(), None);
125 ///
126 /// // The compressed size should be smaller than the original
127 /// assert!(compressed.compressed_size() < data.len());
128 ///
129 /// // We can recover the original data
130 /// let uncompressed = compressed.uncompress().unwrap();
131 /// assert_eq!(uncompressed, data.as_bytes());
132 /// ```
133 pub fn from_uncompressed_data(
134 uncompressed_data: impl Into<Vec<u8>>,
135 digest: Option<Digest>
136 ) -> Self {
137 let uncompressed_data = uncompressed_data.into();
138 let compressed_data = compress_to_vec(&uncompressed_data, 6);
139 let checksum = crc32(&uncompressed_data);
140 let uncompressed_size = uncompressed_data.len();
141 let compressed_size = compressed_data.len();
142 if compressed_size != 0 && compressed_size < uncompressed_size {
143 Self {
144 checksum,
145 uncompressed_size,
146 compressed_data,
147 digest,
148 }
149 } else {
150 Self {
151 checksum,
152 uncompressed_size,
153 compressed_data: uncompressed_data,
154 digest,
155 }
156 }
157 }
158
159 /// Decompresses and returns the original uncompressed data.
160 ///
161 /// This method performs the reverse of the compression process, restoring
162 /// the original data. It also verifies the integrity of the data using the
163 /// stored checksum.
164 ///
165 /// # Returns
166 ///
167 /// A `Result` containing the uncompressed data if successful,
168 /// or an error if decompression fails or the checksum doesn't match.
169 ///
170 /// # Errors
171 ///
172 /// Returns an error if:
173 /// - The compressed data is corrupt and cannot be decompressed
174 /// - The checksum of the decompressed data doesn't match the stored checksum
175 ///
176 /// # Example
177 ///
178 /// ```
179 /// use bc_components::Compressed;
180 ///
181 /// // Original data
182 /// let original = b"This is some example data to compress";
183 ///
184 /// // Compress it
185 /// let compressed = Compressed::from_uncompressed_data(original, None);
186 ///
187 /// // Uncompress to get the original data back
188 /// let uncompressed = compressed.uncompress().unwrap();
189 /// assert_eq!(uncompressed, original);
190 /// ```
191 pub fn uncompress(&self) -> Result<Vec<u8>> {
192 let compressed_size = self.compressed_data.len();
193 if compressed_size >= self.uncompressed_size {
194 return Ok(self.compressed_data.clone());
195 }
196
197 let uncompressed_data = decompress_to_vec(&self.compressed_data).map_err(|_|
198 anyhow!("corrupt compressed data")
199 )?;
200 if crc32(&uncompressed_data) != self.checksum {
201 bail!("compressed data checksum mismatch");
202 }
203
204 Ok(uncompressed_data)
205 }
206
207 /// Returns the size of the compressed data in bytes.
208 ///
209 /// # Returns
210 ///
211 /// The size of the compressed data in bytes.
212 ///
213 /// # Example
214 ///
215 /// ```
216 /// use bc_components::Compressed;
217 ///
218 /// let data = b"Hello world!";
219 /// let compressed = Compressed::from_uncompressed_data(data, None);
220 ///
221 /// // For small inputs like this, compression might not be effective
222 /// // so the compressed_size might equal the original size
223 /// println!("Compressed size: {}", compressed.compressed_size());
224 /// ```
225 pub fn compressed_size(&self) -> usize {
226 self.compressed_data.len()
227 }
228
229 /// Returns the compression ratio of the data.
230 ///
231 /// The compression ratio is calculated as (compressed size) / (uncompressed size),
232 /// so lower values indicate better compression.
233 ///
234 /// # Returns
235 ///
236 /// A floating-point value representing the compression ratio.
237 /// - Values less than 1.0 indicate effective compression
238 /// - Values equal to 1.0 indicate no compression was applied
239 /// - Values of NaN can occur if the uncompressed size is zero
240 ///
241 /// # Example
242 ///
243 /// ```
244 /// use bc_components::Compressed;
245 ///
246 /// // A string with a lot of repetition should compress well
247 /// let data = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
248 /// let compressed = Compressed::from_uncompressed_data(data.as_bytes(), None);
249 ///
250 /// // Should have a good compression ratio (much less than 1.0)
251 /// let ratio = compressed.compression_ratio();
252 /// assert!(ratio < 0.5);
253 /// ```
254 pub fn compression_ratio(&self) -> f64 {
255 (self.compressed_size() as f64) / (self.uncompressed_size as f64)
256 }
257
258 /// Returns a reference to the digest of the compressed data, if available.
259 ///
260 /// # Returns
261 ///
262 /// An optional reference to the `Digest` associated with this compressed data.
263 ///
264 /// # Example
265 ///
266 /// ```
267 /// use bc_components::{Compressed, Digest};
268 ///
269 /// let data = b"Hello world!";
270 /// let digest = Digest::from_image(data);
271 /// let compressed = Compressed::from_uncompressed_data(data, Some(digest.clone()));
272 ///
273 /// // We can retrieve the digest we associated with the compressed data
274 /// assert_eq!(compressed.digest_ref_opt(), Some(&digest));
275 /// ```
276 pub fn digest_ref_opt(&self) -> Option<&Digest> {
277 self.digest.as_ref()
278 }
279
280 /// Returns whether this compressed data has an associated digest.
281 ///
282 /// # Returns
283 ///
284 /// `true` if this compressed data has a digest, `false` otherwise.
285 ///
286 /// # Example
287 ///
288 /// ```
289 /// use bc_components::{Compressed, Digest};
290 ///
291 /// // Create compressed data without a digest
292 /// let compressed1 = Compressed::from_uncompressed_data(b"Hello", None);
293 /// assert!(!compressed1.has_digest());
294 ///
295 /// // Create compressed data with a digest
296 /// let digest = Digest::from_image(b"Hello");
297 /// let compressed2 = Compressed::from_uncompressed_data(b"Hello", Some(digest));
298 /// assert!(compressed2.has_digest());
299 /// ```
300 pub fn has_digest(&self) -> bool {
301 self.digest.is_some()
302 }
303}
304
305/// Implementation of the `DigestProvider` trait for `Compressed`.
306///
307/// Allows `Compressed` objects with digests to be used with APIs that accept
308/// `DigestProvider` implementations.
309impl DigestProvider for Compressed {
310 /// Returns the cryptographic digest associated with this compressed data.
311 ///
312 /// # Returns
313 ///
314 /// A `Cow<'_, Digest>` containing the digest.
315 ///
316 /// # Panics
317 ///
318 /// Panics if there is no digest associated with this compressed data.
319 /// Use `has_digest()` or `digest_ref_opt()` to check before calling this method.
320 fn digest(&self) -> Cow<'_, Digest> {
321 Cow::Owned(self.digest.as_ref().unwrap().clone())
322 }
323}
324
325/// Implementation of the `Debug` trait for `Compressed`.
326///
327/// Provides a human-readable debug representation of a `Compressed` object
328/// showing its key properties: checksum, sizes, compression ratio, and digest.
329impl std::fmt::Debug for Compressed {
330 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
331 write!(
332 f,
333 "Compressed(checksum: {}, size: {}/{}, ratio: {:.2}, digest: {})",
334 hex::encode(self.checksum.to_be_bytes()),
335 self.compressed_size(),
336 self.uncompressed_size,
337 self.compression_ratio(),
338 self
339 .digest_ref_opt()
340 .map(|d| d.short_description())
341 .unwrap_or_else(|| "None".to_string())
342 )
343 }
344}
345
346/// Implementation of `AsRef<Compressed>` for `Compressed`.
347///
348/// This allows passing a `Compressed` instance to functions that take
349/// `AsRef<Compressed>` parameters.
350impl AsRef<Compressed> for Compressed {
351 fn as_ref(&self) -> &Compressed {
352 self
353 }
354}
355
356/// Implementation of the `CBORTagged` trait for `Compressed`.
357///
358/// Defines the CBOR tag(s) used when serializing a `Compressed` object.
359impl CBORTagged for Compressed {
360 fn cbor_tags() -> Vec<Tag> {
361 tags_for_values(&[tags::TAG_COMPRESSED])
362 }
363}
364
365/// Conversion from `Compressed` to CBOR for serialization.
366impl From<Compressed> for CBOR {
367 fn from(value: Compressed) -> Self {
368 value.tagged_cbor()
369 }
370}
371
372/// Implementation of CBOR encoding for `Compressed`.
373///
374/// Defines how a `Compressed` object is serialized to untagged CBOR.
375/// The format is:
376/// ```text
377/// [
378/// checksum: uint,
379/// uncompressed_size: uint,
380/// compressed_data: bytes,
381/// digest?: Digest // Optional
382/// ]
383/// ```
384impl CBORTaggedEncodable for Compressed {
385 fn untagged_cbor(&self) -> CBOR {
386 let mut elements = vec![
387 self.checksum.into(),
388 self.uncompressed_size.into(),
389 CBOR::to_byte_string(&self.compressed_data)
390 ];
391 if let Some(digest) = self.digest.clone() {
392 elements.push(digest.into());
393 }
394 CBORCase::Array(elements).into()
395 }
396}
397
398/// Conversion from CBOR to `Compressed` for deserialization.
399impl TryFrom<CBOR> for Compressed {
400 type Error = dcbor::Error;
401
402 fn try_from(cbor: CBOR) -> dcbor::Result<Self> {
403 Self::from_tagged_cbor(cbor)
404 }
405}
406
407/// Implementation of CBOR decoding for `Compressed`.
408///
409/// Defines how to create a `Compressed` object from untagged CBOR.
410impl CBORTaggedDecodable for Compressed {
411 fn from_untagged_cbor(cbor: CBOR) -> dcbor::Result<Self> {
412 let elements = cbor.try_into_array()?;
413 if elements.len() < 3 || elements.len() > 4 {
414 return Err("invalid number of elements in compressed".into());
415 }
416 let checksum = elements[0].clone().try_into()?;
417 let uncompressed_size = elements[1].clone().try_into()?;
418 let compressed_data = elements[2].clone().try_into_byte_string()?;
419 let digest = if elements.len() == 4 { Some(elements[3].clone().try_into()?) } else { None };
420 Ok(Self::new(checksum, uncompressed_size, compressed_data, digest)?)
421 }
422}
423
424#[cfg(test)]
425mod tests {
426 use crate::Compressed;
427
428 #[test]
429 fn test_1() {
430 let source =
431 b"Lorem ipsum dolor sit amet consectetur adipiscing elit mi nibh ornare proin blandit diam ridiculus, faucibus mus dui eu vehicula nam donec dictumst sed vivamus bibendum aliquet efficitur. Felis imperdiet sodales dictum morbi vivamus augue dis duis aliquet velit ullamcorper porttitor, lobortis dapibus hac purus aliquam natoque iaculis blandit montes nunc pretium.";
432 let compressed = Compressed::from_uncompressed_data(source, None);
433 assert_eq!(
434 format!("{:?}", compressed),
435 "Compressed(checksum: 3eeb10a0, size: 217/364, ratio: 0.60, digest: None)"
436 );
437 assert_eq!(compressed.uncompress().unwrap(), source);
438 }
439
440 #[test]
441 fn test_2() {
442 let source = b"Lorem ipsum dolor sit amet consectetur adipiscing";
443 let compressed = Compressed::from_uncompressed_data(source, None);
444 assert_eq!(
445 format!("{:?}", compressed),
446 "Compressed(checksum: 29db1793, size: 45/49, ratio: 0.92, digest: None)"
447 );
448 assert_eq!(compressed.uncompress().unwrap(), source);
449 }
450
451 #[test]
452 fn test_3() {
453 let source = b"Lorem";
454 let compressed = Compressed::from_uncompressed_data(source, None);
455 assert_eq!(
456 format!("{:?}", compressed),
457 "Compressed(checksum: 44989b39, size: 5/5, ratio: 1.00, digest: None)"
458 );
459 assert_eq!(compressed.uncompress().unwrap(), source);
460 }
461
462 #[test]
463 fn test_4() {
464 let source = b"";
465 let compressed = Compressed::from_uncompressed_data(source, None);
466 assert_eq!(
467 format!("{:?}", compressed),
468 "Compressed(checksum: 00000000, size: 0/0, ratio: NaN, digest: None)"
469 );
470 assert_eq!(compressed.uncompress().unwrap(), source);
471 }
472}