ipfrs_core/
utils.rs

1//! Utility functions for common IPFRS operations.
2//!
3//! This module provides convenience functions that simplify common tasks
4//! when working with blocks, CIDs, and IPLD data.
5//!
6//! # Examples
7//!
8//! ```rust
9//! use ipfrs_core::utils;
10//! use bytes::Bytes;
11//!
12//! // Quick block creation with default settings
13//! let block = utils::quick_block(b"Hello, World!").unwrap();
14//! println!("CID: {}", block.cid());
15//!
16//! // Parse CID from string
17//! let cid = utils::parse_cid_string("QmXXX...").ok();
18//! ```
19
20use crate::{Block, Cid, CidBuilder, HashAlgorithm, Ipld, Result};
21use bytes::Bytes;
22use std::collections::BTreeMap;
23
24/// Creates a block from a byte slice using default settings (SHA2-256, CIDv1, raw codec).
25///
26/// This is a convenience function that combines data conversion and block creation.
27///
28/// # Examples
29///
30/// ```rust
31/// use ipfrs_core::utils::quick_block;
32///
33/// let block = quick_block(b"Hello, IPFRS!").unwrap();
34/// assert_eq!(block.data().as_ref(), b"Hello, IPFRS!");
35/// ```
36pub fn quick_block(data: &[u8]) -> Result<Block> {
37    Block::new(Bytes::copy_from_slice(data))
38}
39
40/// Creates a block with a specific hash algorithm.
41///
42/// # Examples
43///
44/// ```rust
45/// use ipfrs_core::{utils::block_with_hash, HashAlgorithm};
46///
47/// let block = block_with_hash(b"data", HashAlgorithm::Sha3_256).unwrap();
48/// ```
49pub fn block_with_hash(data: &[u8], algorithm: HashAlgorithm) -> Result<Block> {
50    crate::BlockBuilder::new()
51        .hash_algorithm(algorithm)
52        .build_from_slice(data)
53}
54
55/// Parses a CID from a string with automatic multibase detection.
56///
57/// # Examples
58///
59/// ```rust
60/// use ipfrs_core::utils::parse_cid_string;
61///
62/// // Parse CIDv0 (base58btc)
63/// let cid_v0 = parse_cid_string("QmXXX...");
64///
65/// // Parse CIDv1 (base32)
66/// let cid_v1 = parse_cid_string("bafyXXX...");
67/// ```
68pub fn parse_cid_string(s: &str) -> Result<Cid> {
69    crate::cid::parse_cid(s)
70}
71
72/// Computes the CID of data using the specified hash algorithm.
73///
74/// # Examples
75///
76/// ```rust
77/// use ipfrs_core::{utils::cid_of, HashAlgorithm};
78///
79/// let cid = cid_of(b"Hello, World!", HashAlgorithm::Sha256).unwrap();
80/// ```
81pub fn cid_of(data: &[u8], algorithm: HashAlgorithm) -> Result<Cid> {
82    CidBuilder::new().hash_algorithm(algorithm).build(data)
83}
84
85/// Computes a SHA2-256 CID (most common).
86///
87/// # Examples
88///
89/// ```rust
90/// use ipfrs_core::utils::sha256_cid;
91///
92/// let cid = sha256_cid(b"Hello, World!").unwrap();
93/// ```
94pub fn sha256_cid(data: &[u8]) -> Result<Cid> {
95    cid_of(data, HashAlgorithm::Sha256)
96}
97
98/// Computes a SHA3-256 CID.
99///
100/// # Examples
101///
102/// ```rust
103/// use ipfrs_core::utils::sha3_cid;
104///
105/// let cid = sha3_cid(b"Hello, World!").unwrap();
106/// ```
107pub fn sha3_cid(data: &[u8]) -> Result<Cid> {
108    cid_of(data, HashAlgorithm::Sha3_256)
109}
110
111/// Computes a SHA2-512 CID (64-byte hash).
112///
113/// # Examples
114///
115/// ```rust
116/// use ipfrs_core::utils::sha512_cid;
117///
118/// let cid = sha512_cid(b"Hello, World!").unwrap();
119/// ```
120pub fn sha512_cid(data: &[u8]) -> Result<Cid> {
121    cid_of(data, HashAlgorithm::Sha512)
122}
123
124/// Computes a SHA3-512 CID (64-byte Keccak hash).
125///
126/// # Examples
127///
128/// ```rust
129/// use ipfrs_core::utils::sha3_512_cid;
130///
131/// let cid = sha3_512_cid(b"Hello, World!").unwrap();
132/// ```
133pub fn sha3_512_cid(data: &[u8]) -> Result<Cid> {
134    cid_of(data, HashAlgorithm::Sha3_512)
135}
136
137/// Computes a BLAKE2b-256 CID (fast, 32-byte hash).
138///
139/// # Examples
140///
141/// ```rust
142/// use ipfrs_core::utils::blake2b256_cid;
143///
144/// let cid = blake2b256_cid(b"Hello, World!").unwrap();
145/// ```
146pub fn blake2b256_cid(data: &[u8]) -> Result<Cid> {
147    cid_of(data, HashAlgorithm::Blake2b256)
148}
149
150/// Computes a BLAKE2b-512 CID (fast, 64-byte hash).
151///
152/// # Examples
153///
154/// ```rust
155/// use ipfrs_core::utils::blake2b512_cid;
156///
157/// let cid = blake2b512_cid(b"Hello, World!").unwrap();
158/// ```
159pub fn blake2b512_cid(data: &[u8]) -> Result<Cid> {
160    cid_of(data, HashAlgorithm::Blake2b512)
161}
162
163/// Computes a BLAKE2s-256 CID (fast, optimized for 8-32 bit platforms).
164///
165/// # Examples
166///
167/// ```rust
168/// use ipfrs_core::utils::blake2s256_cid;
169///
170/// let cid = blake2s256_cid(b"Hello, World!").unwrap();
171/// ```
172pub fn blake2s256_cid(data: &[u8]) -> Result<Cid> {
173    cid_of(data, HashAlgorithm::Blake2s256)
174}
175
176/// Computes a BLAKE3 CID (fastest, modern cryptographic design).
177///
178/// # Examples
179///
180/// ```rust
181/// use ipfrs_core::utils::blake3_cid;
182///
183/// let cid = blake3_cid(b"Hello, World!").unwrap();
184/// ```
185pub fn blake3_cid(data: &[u8]) -> Result<Cid> {
186    cid_of(data, HashAlgorithm::Blake3)
187}
188
189/// Checks if two blocks have the same CID (content equality).
190///
191/// # Examples
192///
193/// ```rust
194/// use ipfrs_core::{utils::quick_block, utils::blocks_equal};
195///
196/// let block1 = quick_block(b"data").unwrap();
197/// let block2 = quick_block(b"data").unwrap();
198/// assert!(blocks_equal(&block1, &block2));
199/// ```
200pub fn blocks_equal(a: &Block, b: &Block) -> bool {
201    a.cid() == b.cid()
202}
203
204/// Verifies that a block's CID matches its content.
205///
206/// Returns true if the block is valid, false otherwise.
207///
208/// # Examples
209///
210/// ```rust
211/// use ipfrs_core::utils::{quick_block, verify_block};
212///
213/// let block = quick_block(b"Hello").unwrap();
214/// assert!(verify_block(&block).unwrap());
215/// ```
216pub fn verify_block(block: &Block) -> Result<bool> {
217    block.verify()
218}
219
220/// Creates an IPLD map from key-value pairs.
221///
222/// # Examples
223///
224/// ```rust
225/// use ipfrs_core::{utils::ipld_map, Ipld};
226///
227/// let map = ipld_map(vec![
228///     ("name", Ipld::String("Alice".to_string())),
229///     ("age", Ipld::Integer(30)),
230/// ]);
231/// ```
232pub fn ipld_map<K: Into<String>>(pairs: Vec<(K, Ipld)>) -> Ipld {
233    let mut map = BTreeMap::new();
234    for (k, v) in pairs {
235        map.insert(k.into(), v);
236    }
237    Ipld::Map(map)
238}
239
240/// Creates an IPLD list from values.
241///
242/// # Examples
243///
244/// ```rust
245/// use ipfrs_core::{utils::ipld_list, Ipld};
246///
247/// let list = ipld_list(vec![
248///     Ipld::Integer(1),
249///     Ipld::Integer(2),
250///     Ipld::Integer(3),
251/// ]);
252/// ```
253pub fn ipld_list(values: Vec<Ipld>) -> Ipld {
254    Ipld::List(values)
255}
256
257/// Encodes IPLD data to DAG-CBOR bytes.
258///
259/// # Examples
260///
261/// ```rust
262/// use ipfrs_core::{utils::ipld_to_cbor, Ipld};
263///
264/// let ipld = Ipld::String("hello".to_string());
265/// let cbor = ipld_to_cbor(&ipld).unwrap();
266/// ```
267pub fn ipld_to_cbor(ipld: &Ipld) -> Result<Vec<u8>> {
268    ipld.to_dag_cbor()
269}
270
271/// Decodes IPLD data from DAG-CBOR bytes.
272///
273/// # Examples
274///
275/// ```rust
276/// use ipfrs_core::{utils::{ipld_to_cbor, ipld_from_cbor}, Ipld};
277///
278/// let ipld = Ipld::String("hello".to_string());
279/// let cbor = ipld_to_cbor(&ipld).unwrap();
280/// let decoded = ipld_from_cbor(&cbor).unwrap();
281/// assert_eq!(ipld, decoded);
282/// ```
283pub fn ipld_from_cbor(data: &[u8]) -> Result<Ipld> {
284    Ipld::from_dag_cbor(data)
285}
286
287/// Encodes IPLD data to DAG-JSON string.
288///
289/// # Examples
290///
291/// ```rust
292/// use ipfrs_core::{utils::ipld_to_json, Ipld};
293///
294/// let ipld = Ipld::String("hello".to_string());
295/// let json = ipld_to_json(&ipld).unwrap();
296/// ```
297pub fn ipld_to_json(ipld: &Ipld) -> Result<String> {
298    ipld.to_dag_json()
299}
300
301/// Decodes IPLD data from DAG-JSON string.
302///
303/// # Examples
304///
305/// ```rust
306/// use ipfrs_core::{utils::{ipld_to_json, ipld_from_json}, Ipld};
307///
308/// let ipld = Ipld::String("hello".to_string());
309/// let json = ipld_to_json(&ipld).unwrap();
310/// let decoded = ipld_from_json(&json).unwrap();
311/// assert_eq!(ipld, decoded);
312/// ```
313pub fn ipld_from_json(data: &str) -> Result<Ipld> {
314    Ipld::from_dag_json(data)
315}
316
317/// Formats a block size in human-readable format (KB, MB, GB).
318///
319/// # Examples
320///
321/// ```rust
322/// use ipfrs_core::utils::format_size;
323///
324/// assert_eq!(format_size(1024), "1.00 KB");
325/// assert_eq!(format_size(1_048_576), "1.00 MB");
326/// assert_eq!(format_size(1_073_741_824), "1.00 GB");
327/// ```
328pub fn format_size(bytes: u64) -> String {
329    const KB: u64 = 1024;
330    const MB: u64 = KB * 1024;
331    const GB: u64 = MB * 1024;
332
333    if bytes >= GB {
334        format!("{:.2} GB", bytes as f64 / GB as f64)
335    } else if bytes >= MB {
336        format!("{:.2} MB", bytes as f64 / MB as f64)
337    } else if bytes >= KB {
338        format!("{:.2} KB", bytes as f64 / KB as f64)
339    } else {
340        format!("{} B", bytes)
341    }
342}
343
344/// Estimates the number of chunks needed for data of the given size.
345///
346/// Uses the default chunk size (256 KB).
347///
348/// # Examples
349///
350/// ```rust
351/// use ipfrs_core::utils::estimate_chunks;
352///
353/// assert_eq!(estimate_chunks(1_000_000), 4); // ~1 MB → 4 chunks
354/// ```
355pub fn estimate_chunks(data_size: u64) -> usize {
356    const DEFAULT_CHUNK_SIZE: u64 = 256 * 1024; // 256 KB
357    data_size.div_ceil(DEFAULT_CHUNK_SIZE) as usize
358}
359
360/// Checks if data needs chunking based on the maximum block size.
361///
362/// # Examples
363///
364/// ```rust
365/// use ipfrs_core::utils::needs_chunking;
366///
367/// assert!(!needs_chunking(100)); // Small data
368/// assert!(needs_chunking(3_000_000)); // Large data (> 2 MiB)
369/// ```
370pub fn needs_chunking(data_size: u64) -> bool {
371    data_size > crate::MAX_BLOCK_SIZE as u64
372}
373
374//
375// Diagnostic and Validation Utilities
376//
377
378/// Information about a CID for diagnostic purposes.
379#[derive(Debug, Clone)]
380pub struct CidInfo {
381    /// CID string representation
382    pub cid_string: String,
383    /// CID version (0 or 1)
384    pub version: u8,
385    /// Codec identifier
386    pub codec: u64,
387    /// Hash algorithm code
388    pub hash_code: u64,
389    /// Hash digest length in bytes
390    pub hash_length: usize,
391}
392
393impl std::fmt::Display for CidInfo {
394    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
395        write!(
396            f,
397            "CID: {}\n  Version: {}\n  Codec: 0x{:x}\n  Hash: 0x{:x} ({} bytes)",
398            self.cid_string, self.version, self.codec, self.hash_code, self.hash_length
399        )
400    }
401}
402
403/// Inspects a CID and returns detailed diagnostic information.
404///
405/// # Examples
406///
407/// ```rust
408/// use ipfrs_core::utils::{sha256_cid, inspect_cid};
409///
410/// let cid = sha256_cid(b"Hello").unwrap();
411/// let info = inspect_cid(&cid);
412/// println!("{}", info);
413/// ```
414pub fn inspect_cid(cid: &Cid) -> CidInfo {
415    CidInfo {
416        cid_string: cid.to_string(),
417        version: match cid.version() {
418            cid::Version::V0 => 0,
419            cid::Version::V1 => 1,
420        },
421        codec: cid.codec(),
422        hash_code: cid.hash().code(),
423        hash_length: cid.hash().digest().len(),
424    }
425}
426
427/// Information about a block for diagnostic purposes.
428#[derive(Debug, Clone)]
429pub struct BlockInfo {
430    /// Block's CID
431    pub cid: String,
432    /// Block size in bytes
433    pub size: u64,
434    /// Human-readable size
435    pub size_formatted: String,
436    /// Whether the block is valid (CID matches content)
437    pub is_valid: bool,
438}
439
440impl std::fmt::Display for BlockInfo {
441    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
442        write!(
443            f,
444            "Block:\n  CID: {}\n  Size: {} ({})\n  Valid: {}",
445            self.cid, self.size, self.size_formatted, self.is_valid
446        )
447    }
448}
449
450/// Inspects a block and returns detailed diagnostic information.
451///
452/// # Examples
453///
454/// ```rust
455/// use ipfrs_core::utils::{quick_block, inspect_block};
456///
457/// let block = quick_block(b"Hello, World!").unwrap();
458/// let info = inspect_block(&block).unwrap();
459/// println!("{}", info);
460/// ```
461pub fn inspect_block(block: &Block) -> Result<BlockInfo> {
462    let is_valid = block.verify()?;
463    Ok(BlockInfo {
464        cid: block.cid().to_string(),
465        size: block.size(),
466        size_formatted: format_size(block.size()),
467        is_valid,
468    })
469}
470
471/// Validates that a string is a valid CID.
472///
473/// # Examples
474///
475/// ```rust
476/// use ipfrs_core::utils::validate_cid_string;
477///
478/// assert!(validate_cid_string("QmXXX").is_ok() || validate_cid_string("QmXXX").is_err());
479/// ```
480pub fn validate_cid_string(s: &str) -> Result<Cid> {
481    parse_cid_string(s)
482}
483
484/// Validates a collection of blocks, returning the number of valid and invalid blocks.
485///
486/// # Examples
487///
488/// ```rust
489/// use ipfrs_core::utils::{quick_block, validate_blocks};
490///
491/// let blocks = vec![
492///     quick_block(b"data1").unwrap(),
493///     quick_block(b"data2").unwrap(),
494/// ];
495/// let (valid, invalid) = validate_blocks(&blocks).unwrap();
496/// assert_eq!(valid, 2);
497/// assert_eq!(invalid, 0);
498/// ```
499pub fn validate_blocks(blocks: &[Block]) -> Result<(usize, usize)> {
500    let mut valid = 0;
501    let mut invalid = 0;
502
503    for block in blocks {
504        if block.verify()? {
505            valid += 1;
506        } else {
507            invalid += 1;
508        }
509    }
510
511    Ok((valid, invalid))
512}
513
514/// Finds blocks in a collection that have invalid CIDs (mismatched content).
515///
516/// # Examples
517///
518/// ```rust
519/// use ipfrs_core::utils::{quick_block, find_invalid_blocks};
520///
521/// let blocks = vec![
522///     quick_block(b"data1").unwrap(),
523///     quick_block(b"data2").unwrap(),
524/// ];
525/// let invalid = find_invalid_blocks(&blocks).unwrap();
526/// assert_eq!(invalid.len(), 0);
527/// ```
528pub fn find_invalid_blocks(blocks: &[Block]) -> Result<Vec<usize>> {
529    let mut invalid_indices = Vec::new();
530
531    for (i, block) in blocks.iter().enumerate() {
532        if !block.verify()? {
533            invalid_indices.push(i);
534        }
535    }
536
537    Ok(invalid_indices)
538}
539
540/// Measures the time taken to generate a CID for the given data.
541///
542/// Returns the duration in microseconds and the generated CID.
543///
544/// # Examples
545///
546/// ```rust
547/// use ipfrs_core::utils::measure_cid_generation;
548/// use ipfrs_core::HashAlgorithm;
549///
550/// let (duration_us, cid) = measure_cid_generation(b"test data", HashAlgorithm::Sha256).unwrap();
551/// assert!(duration_us > 0);
552/// ```
553pub fn measure_cid_generation(data: &[u8], algorithm: HashAlgorithm) -> Result<(u64, Cid)> {
554    let start = std::time::Instant::now();
555    let cid = cid_of(data, algorithm)?;
556    let duration = start.elapsed();
557    Ok((duration.as_micros() as u64, cid))
558}
559
560/// Measures the time taken to create a block from the given data.
561///
562/// Returns the duration in microseconds and the created block.
563///
564/// # Examples
565///
566/// ```rust
567/// use ipfrs_core::utils::measure_block_creation;
568///
569/// let (duration_us, block) = measure_block_creation(b"test data").unwrap();
570/// assert!(duration_us > 0);
571/// ```
572pub fn measure_block_creation(data: &[u8]) -> Result<(u64, Block)> {
573    let start = std::time::Instant::now();
574    let block = quick_block(data)?;
575    let duration = start.elapsed();
576    Ok((duration.as_micros() as u64, block))
577}
578
579/// Calculates the deduplication ratio for a collection of blocks.
580///
581/// Returns a value between 0.0 and 1.0, where:
582/// - 1.0 means all blocks are unique (no deduplication)
583/// - 0.5 means 50% of blocks are unique (50% deduplication)
584///
585/// # Examples
586///
587/// ```rust
588/// use ipfrs_core::utils::{quick_block, deduplication_ratio};
589///
590/// let blocks = vec![
591///     quick_block(b"same").unwrap(),
592///     quick_block(b"same").unwrap(),
593///     quick_block(b"different").unwrap(),
594/// ];
595/// let ratio = deduplication_ratio(&blocks);
596/// assert!((ratio - 0.666).abs() < 0.01); // 2 unique out of 3 = ~0.666
597/// ```
598pub fn deduplication_ratio(blocks: &[Block]) -> f64 {
599    if blocks.is_empty() {
600        return 0.0;
601    }
602
603    let unique_cids: std::collections::HashSet<_> = blocks.iter().map(|b| b.cid()).collect();
604    unique_cids.len() as f64 / blocks.len() as f64
605}
606
607/// Counts the number of unique CIDs in a collection of blocks.
608///
609/// # Examples
610///
611/// ```rust
612/// use ipfrs_core::utils::{quick_block, count_unique_blocks};
613///
614/// let blocks = vec![
615///     quick_block(b"same").unwrap(),
616///     quick_block(b"same").unwrap(),
617///     quick_block(b"different").unwrap(),
618/// ];
619/// assert_eq!(count_unique_blocks(&blocks), 2);
620/// ```
621pub fn count_unique_blocks(blocks: &[Block]) -> usize {
622    let unique_cids: std::collections::HashSet<_> = blocks.iter().map(|b| b.cid()).collect();
623    unique_cids.len()
624}
625
626/// Calculates the total size of all blocks in bytes.
627///
628/// # Examples
629///
630/// ```rust
631/// use ipfrs_core::utils::{quick_block, total_blocks_size};
632///
633/// let blocks = vec![
634///     quick_block(b"data1").unwrap(),
635///     quick_block(b"data2").unwrap(),
636/// ];
637/// assert_eq!(total_blocks_size(&blocks), 10); // "data1" + "data2" = 10 bytes
638/// ```
639pub fn total_blocks_size(blocks: &[Block]) -> u64 {
640    blocks.iter().map(|b| b.size()).sum()
641}
642
643// ============================================================================
644// Compression Utilities
645// ============================================================================
646
647/// Compress a block's data with the specified algorithm and level
648///
649/// Returns a new compressed `Bytes` buffer.
650///
651/// # Example
652///
653/// ```rust
654/// use ipfrs_core::utils::{quick_block, compress_block_data, decompress_block_data};
655/// use ipfrs_core::CompressionAlgorithm;
656///
657/// let data = "Hello, World! ".repeat(100); // Use compressible data
658/// let block = quick_block(data.as_bytes()).unwrap();
659/// let compressed = compress_block_data(block.data(), CompressionAlgorithm::Zstd, 3).unwrap();
660/// let decompressed = decompress_block_data(&compressed, CompressionAlgorithm::Zstd).unwrap();
661/// assert_eq!(block.data(), &decompressed);
662/// ```
663pub fn compress_block_data(
664    data: &bytes::Bytes,
665    algorithm: crate::CompressionAlgorithm,
666    level: u8,
667) -> crate::Result<bytes::Bytes> {
668    crate::compress(data, algorithm, level)
669}
670
671/// Decompress block data that was previously compressed
672///
673/// # Example
674///
675/// ```rust
676/// use ipfrs_core::utils::{compress_block_data, decompress_block_data};
677/// use ipfrs_core::CompressionAlgorithm;
678/// use bytes::Bytes;
679///
680/// let data = Bytes::from_static(b"Hello, World!");
681/// let compressed = compress_block_data(&data, CompressionAlgorithm::Lz4, 3).unwrap();
682/// let decompressed = decompress_block_data(&compressed, CompressionAlgorithm::Lz4).unwrap();
683/// assert_eq!(data, decompressed);
684/// ```
685pub fn decompress_block_data(
686    compressed: &bytes::Bytes,
687    algorithm: crate::CompressionAlgorithm,
688) -> crate::Result<bytes::Bytes> {
689    crate::decompress(compressed, algorithm)
690}
691
692/// Estimate how much space would be saved by compressing the data
693///
694/// Returns a ratio between 0.0 and 1.0+ where lower is better.
695/// A ratio of 0.5 means the compressed data is 50% of the original size.
696///
697/// # Example
698///
699/// ```rust
700/// use ipfrs_core::utils::estimate_compression_savings;
701/// use ipfrs_core::CompressionAlgorithm;
702/// use bytes::Bytes;
703///
704/// let data = Bytes::from("a".repeat(1000)); // Highly compressible
705/// let savings = estimate_compression_savings(&data, CompressionAlgorithm::Zstd, 5).unwrap();
706/// assert!(savings < 0.2); // Should compress to less than 20% of original
707/// ```
708pub fn estimate_compression_savings(
709    data: &bytes::Bytes,
710    algorithm: crate::CompressionAlgorithm,
711    level: u8,
712) -> crate::Result<f64> {
713    crate::compression_ratio(data, algorithm, level)
714}
715
716/// Check if data is worth compressing based on size and estimated ratio
717///
718/// Returns `true` if compression would likely save significant space.
719/// Uses a threshold of 20% savings and minimum size of 1KB.
720///
721/// # Example
722///
723/// ```rust
724/// use ipfrs_core::utils::should_compress;
725/// use ipfrs_core::CompressionAlgorithm;
726/// use bytes::Bytes;
727///
728/// let small_data = Bytes::from_static(b"Hello"); // Too small
729/// assert!(!should_compress(&small_data, CompressionAlgorithm::Zstd, 3).unwrap());
730///
731/// let large_repetitive = Bytes::from("a".repeat(10000)); // Worth compressing
732/// assert!(should_compress(&large_repetitive, CompressionAlgorithm::Zstd, 3).unwrap());
733/// ```
734pub fn should_compress(
735    data: &bytes::Bytes,
736    algorithm: crate::CompressionAlgorithm,
737    level: u8,
738) -> crate::Result<bool> {
739    // Don't compress small data (overhead not worth it)
740    if data.len() < 1024 {
741        return Ok(false);
742    }
743
744    // Don't compress if algorithm is None
745    if algorithm == crate::CompressionAlgorithm::None {
746        return Ok(false);
747    }
748
749    // Check if we'd save at least 20%
750    let ratio = crate::compression_ratio(data, algorithm, level)?;
751    Ok(ratio < 0.8)
752}
753
754/// Get recommended compression algorithm based on use case
755///
756/// Returns `Zstd` for archival (best ratio) or `Lz4` for real-time (fastest).
757///
758/// # Example
759///
760/// ```rust
761/// use ipfrs_core::utils::recommended_compression;
762/// use ipfrs_core::CompressionAlgorithm;
763///
764/// let archival = recommended_compression(true);
765/// assert_eq!(archival, CompressionAlgorithm::Zstd);
766///
767/// let realtime = recommended_compression(false);
768/// assert_eq!(realtime, CompressionAlgorithm::Lz4);
769/// ```
770pub fn recommended_compression(prefer_ratio_over_speed: bool) -> crate::CompressionAlgorithm {
771    if prefer_ratio_over_speed {
772        crate::CompressionAlgorithm::Zstd // Best compression ratio
773    } else {
774        crate::CompressionAlgorithm::Lz4 // Fastest
775    }
776}
777
778#[cfg(test)]
779mod tests {
780    use super::*;
781
782    #[test]
783    fn test_quick_block() {
784        let block = quick_block(b"test data").unwrap();
785        assert_eq!(block.data().as_ref(), b"test data");
786    }
787
788    #[test]
789    fn test_block_with_hash() {
790        let block1 = block_with_hash(b"data", HashAlgorithm::Sha256).unwrap();
791        let block2 = block_with_hash(b"data", HashAlgorithm::Sha3_256).unwrap();
792        // Different hash algorithms produce different CIDs
793        assert_ne!(block1.cid(), block2.cid());
794    }
795
796    #[test]
797    fn test_cid_functions() {
798        let sha256 = sha256_cid(b"test").unwrap();
799        let sha3 = sha3_cid(b"test").unwrap();
800        assert_ne!(sha256, sha3);
801    }
802
803    #[test]
804    fn test_all_hash_algorithm_cid_functions() {
805        let data = b"test data for all hash algorithms";
806
807        // Test all 8 hash algorithms
808        let sha256 = sha256_cid(data).unwrap();
809        let sha512 = sha512_cid(data).unwrap();
810        let sha3_256 = sha3_cid(data).unwrap();
811        let sha3_512 = sha3_512_cid(data).unwrap();
812        let blake2b256 = blake2b256_cid(data).unwrap();
813        let blake2b512 = blake2b512_cid(data).unwrap();
814        let blake2s256 = blake2s256_cid(data).unwrap();
815        let blake3 = blake3_cid(data).unwrap();
816
817        // All should produce different CIDs
818        let cids = vec![
819            sha256, sha512, sha3_256, sha3_512, blake2b256, blake2b512, blake2s256, blake3,
820        ];
821
822        // Check uniqueness - each hash algorithm should produce different output
823        for i in 0..cids.len() {
824            for j in (i + 1)..cids.len() {
825                assert_ne!(cids[i], cids[j], "CID {} and {} should be different", i, j);
826            }
827        }
828    }
829
830    #[test]
831    fn test_hash_algorithm_determinism() {
832        let data = b"determinism test";
833
834        // Each algorithm should produce the same CID for the same data
835        assert_eq!(sha256_cid(data).unwrap(), sha256_cid(data).unwrap());
836        assert_eq!(sha512_cid(data).unwrap(), sha512_cid(data).unwrap());
837        assert_eq!(sha3_cid(data).unwrap(), sha3_cid(data).unwrap());
838        assert_eq!(sha3_512_cid(data).unwrap(), sha3_512_cid(data).unwrap());
839        assert_eq!(blake2b256_cid(data).unwrap(), blake2b256_cid(data).unwrap());
840        assert_eq!(blake2b512_cid(data).unwrap(), blake2b512_cid(data).unwrap());
841        assert_eq!(blake2s256_cid(data).unwrap(), blake2s256_cid(data).unwrap());
842        assert_eq!(blake3_cid(data).unwrap(), blake3_cid(data).unwrap());
843    }
844
845    #[test]
846    fn test_hash_algorithm_names_and_sizes() {
847        use crate::HashAlgorithm;
848
849        // Test name() method
850        assert_eq!(HashAlgorithm::Sha256.name(), "SHA2-256");
851        assert_eq!(HashAlgorithm::Sha512.name(), "SHA2-512");
852        assert_eq!(HashAlgorithm::Sha3_256.name(), "SHA3-256");
853        assert_eq!(HashAlgorithm::Sha3_512.name(), "SHA3-512");
854        assert_eq!(HashAlgorithm::Blake2b256.name(), "BLAKE2b-256");
855        assert_eq!(HashAlgorithm::Blake2b512.name(), "BLAKE2b-512");
856        assert_eq!(HashAlgorithm::Blake2s256.name(), "BLAKE2s-256");
857        assert_eq!(HashAlgorithm::Blake3.name(), "BLAKE3");
858
859        // Test hash_size() method
860        assert_eq!(HashAlgorithm::Sha256.hash_size(), 32);
861        assert_eq!(HashAlgorithm::Sha512.hash_size(), 64);
862        assert_eq!(HashAlgorithm::Sha3_256.hash_size(), 32);
863        assert_eq!(HashAlgorithm::Sha3_512.hash_size(), 64);
864        assert_eq!(HashAlgorithm::Blake2b256.hash_size(), 32);
865        assert_eq!(HashAlgorithm::Blake2b512.hash_size(), 64);
866        assert_eq!(HashAlgorithm::Blake2s256.hash_size(), 32);
867        assert_eq!(HashAlgorithm::Blake3.hash_size(), 32);
868    }
869
870    #[test]
871    fn test_hash_algorithm_all() {
872        use crate::HashAlgorithm;
873
874        let all = HashAlgorithm::all();
875        assert_eq!(all.len(), 8);
876
877        // Verify all algorithms are present
878        assert!(all.contains(&HashAlgorithm::Sha256));
879        assert!(all.contains(&HashAlgorithm::Sha512));
880        assert!(all.contains(&HashAlgorithm::Sha3_256));
881        assert!(all.contains(&HashAlgorithm::Sha3_512));
882        assert!(all.contains(&HashAlgorithm::Blake2b256));
883        assert!(all.contains(&HashAlgorithm::Blake2b512));
884        assert!(all.contains(&HashAlgorithm::Blake2s256));
885        assert!(all.contains(&HashAlgorithm::Blake3));
886    }
887
888    #[test]
889    fn test_blocks_equal() {
890        let block1 = quick_block(b"same").unwrap();
891        let block2 = quick_block(b"same").unwrap();
892        let block3 = quick_block(b"different").unwrap();
893
894        assert!(blocks_equal(&block1, &block2));
895        assert!(!blocks_equal(&block1, &block3));
896    }
897
898    #[test]
899    fn test_verify_block() {
900        let block = quick_block(b"verify me").unwrap();
901        assert!(verify_block(&block).unwrap());
902    }
903
904    #[test]
905    fn test_ipld_map() {
906        let map = ipld_map(vec![
907            ("key1", Ipld::String("value1".to_string())),
908            ("key2", Ipld::Integer(42)),
909        ]);
910
911        match map {
912            Ipld::Map(m) => {
913                assert_eq!(m.len(), 2);
914                assert!(m.contains_key("key1"));
915                assert!(m.contains_key("key2"));
916            }
917            _ => panic!("Expected map"),
918        }
919    }
920
921    #[test]
922    fn test_ipld_list() {
923        let list = ipld_list(vec![Ipld::Integer(1), Ipld::Integer(2), Ipld::Integer(3)]);
924
925        match list {
926            Ipld::List(l) => assert_eq!(l.len(), 3),
927            _ => panic!("Expected list"),
928        }
929    }
930
931    #[test]
932    fn test_ipld_cbor_roundtrip() {
933        let ipld = Ipld::String("test".to_string());
934        let cbor = ipld_to_cbor(&ipld).unwrap();
935        let decoded = ipld_from_cbor(&cbor).unwrap();
936        assert_eq!(ipld, decoded);
937    }
938
939    #[test]
940    fn test_ipld_json_roundtrip() {
941        let ipld = Ipld::String("test".to_string());
942        let json = ipld_to_json(&ipld).unwrap();
943        let decoded = ipld_from_json(&json).unwrap();
944        assert_eq!(ipld, decoded);
945    }
946
947    #[test]
948    fn test_format_size() {
949        assert_eq!(format_size(512), "512 B");
950        assert_eq!(format_size(1024), "1.00 KB");
951        assert_eq!(format_size(1_048_576), "1.00 MB");
952        assert_eq!(format_size(1_073_741_824), "1.00 GB");
953        assert_eq!(format_size(2_147_483_648), "2.00 GB");
954    }
955
956    #[test]
957    fn test_estimate_chunks() {
958        assert_eq!(estimate_chunks(100), 1);
959        assert_eq!(estimate_chunks(300_000), 2);
960        assert_eq!(estimate_chunks(1_000_000), 4);
961    }
962
963    #[test]
964    fn test_needs_chunking() {
965        assert!(!needs_chunking(100));
966        assert!(!needs_chunking(1_000_000));
967        assert!(!needs_chunking(2_000_000)); // 2MB < MAX_BLOCK_SIZE (2MiB)
968        assert!(needs_chunking(3_000_000)); // 3MB > MAX_BLOCK_SIZE
969        assert!(needs_chunking(10_000_000));
970    }
971
972    // Diagnostic and validation tests
973
974    #[test]
975    fn test_inspect_cid() {
976        let cid = sha256_cid(b"test").unwrap();
977        let info = inspect_cid(&cid);
978        assert_eq!(info.version, 1);
979        assert!(!info.cid_string.is_empty());
980        assert!(info.hash_length > 0);
981    }
982
983    #[test]
984    fn test_inspect_block() {
985        let block = quick_block(b"test data").unwrap();
986        let info = inspect_block(&block).unwrap();
987        assert!(info.is_valid);
988        assert_eq!(info.size, 9_u64);
989        assert!(!info.cid.is_empty());
990        assert!(!info.size_formatted.is_empty());
991    }
992
993    #[test]
994    fn test_cid_info_display() {
995        let cid = sha256_cid(b"test").unwrap();
996        let info = inspect_cid(&cid);
997        let display = format!("{}", info);
998        assert!(display.contains("CID:"));
999        assert!(display.contains("Version:"));
1000        assert!(display.contains("Codec:"));
1001    }
1002
1003    #[test]
1004    fn test_block_info_display() {
1005        let block = quick_block(b"test").unwrap();
1006        let info = inspect_block(&block).unwrap();
1007        let display = format!("{}", info);
1008        assert!(display.contains("Block:"));
1009        assert!(display.contains("CID:"));
1010        assert!(display.contains("Valid:"));
1011    }
1012
1013    #[test]
1014    fn test_validate_blocks() {
1015        let blocks = vec![
1016            quick_block(b"data1").unwrap(),
1017            quick_block(b"data2").unwrap(),
1018            quick_block(b"data3").unwrap(),
1019        ];
1020
1021        let (valid, invalid) = validate_blocks(&blocks).unwrap();
1022        assert_eq!(valid, 3);
1023        assert_eq!(invalid, 0);
1024    }
1025
1026    #[test]
1027    fn test_validate_blocks_empty() {
1028        let blocks: Vec<Block> = vec![];
1029        let (valid, invalid) = validate_blocks(&blocks).unwrap();
1030        assert_eq!(valid, 0);
1031        assert_eq!(invalid, 0);
1032    }
1033
1034    #[test]
1035    fn test_find_invalid_blocks() {
1036        let blocks = vec![
1037            quick_block(b"data1").unwrap(),
1038            quick_block(b"data2").unwrap(),
1039        ];
1040
1041        let invalid = find_invalid_blocks(&blocks).unwrap();
1042        assert_eq!(invalid.len(), 0);
1043    }
1044
1045    #[test]
1046    fn test_measure_cid_generation() {
1047        let (duration, cid) = measure_cid_generation(b"test data", HashAlgorithm::Sha256).unwrap();
1048        assert!(duration > 0);
1049        assert!(!cid.to_string().is_empty());
1050    }
1051
1052    #[test]
1053    fn test_measure_block_creation() {
1054        let (duration, block) = measure_block_creation(b"test data").unwrap();
1055        assert!(duration > 0);
1056        assert_eq!(block.size(), 9_u64);
1057    }
1058
1059    #[test]
1060    fn test_deduplication_ratio() {
1061        let blocks = vec![
1062            quick_block(b"same").unwrap(),
1063            quick_block(b"same").unwrap(),
1064            quick_block(b"different").unwrap(),
1065        ];
1066
1067        let ratio = deduplication_ratio(&blocks);
1068        // 2 unique out of 3 total
1069        assert!((ratio - 0.666).abs() < 0.01);
1070    }
1071
1072    #[test]
1073    fn test_deduplication_ratio_all_unique() {
1074        let blocks = vec![
1075            quick_block(b"data1").unwrap(),
1076            quick_block(b"data2").unwrap(),
1077            quick_block(b"data3").unwrap(),
1078        ];
1079
1080        let ratio = deduplication_ratio(&blocks);
1081        assert_eq!(ratio, 1.0);
1082    }
1083
1084    #[test]
1085    fn test_deduplication_ratio_all_same() {
1086        let blocks = vec![
1087            quick_block(b"same").unwrap(),
1088            quick_block(b"same").unwrap(),
1089            quick_block(b"same").unwrap(),
1090        ];
1091
1092        let ratio = deduplication_ratio(&blocks);
1093        // 1 unique out of 3 total
1094        assert!((ratio - 0.333).abs() < 0.01);
1095    }
1096
1097    #[test]
1098    fn test_deduplication_ratio_empty() {
1099        let blocks: Vec<Block> = vec![];
1100        let ratio = deduplication_ratio(&blocks);
1101        assert_eq!(ratio, 0.0);
1102    }
1103
1104    #[test]
1105    fn test_count_unique_blocks() {
1106        let blocks = vec![
1107            quick_block(b"same").unwrap(),
1108            quick_block(b"same").unwrap(),
1109            quick_block(b"different").unwrap(),
1110        ];
1111
1112        assert_eq!(count_unique_blocks(&blocks), 2);
1113    }
1114
1115    #[test]
1116    fn test_count_unique_blocks_all_unique() {
1117        let blocks = vec![
1118            quick_block(b"a").unwrap(),
1119            quick_block(b"b").unwrap(),
1120            quick_block(b"c").unwrap(),
1121        ];
1122
1123        assert_eq!(count_unique_blocks(&blocks), 3);
1124    }
1125
1126    #[test]
1127    fn test_count_unique_blocks_empty() {
1128        let blocks: Vec<Block> = vec![];
1129        assert_eq!(count_unique_blocks(&blocks), 0);
1130    }
1131
1132    #[test]
1133    fn test_total_blocks_size() {
1134        let blocks = vec![
1135            quick_block(b"data1").unwrap(), // 5 bytes
1136            quick_block(b"data2").unwrap(), // 5 bytes
1137        ];
1138
1139        assert_eq!(total_blocks_size(&blocks), 10);
1140    }
1141
1142    #[test]
1143    fn test_total_blocks_size_empty() {
1144        let blocks: Vec<Block> = vec![];
1145        assert_eq!(total_blocks_size(&blocks), 0);
1146    }
1147
1148    #[test]
1149    fn test_compress_block_data() {
1150        use crate::CompressionAlgorithm;
1151
1152        let data = bytes::Bytes::from("Hello, World! ".repeat(100));
1153        let compressed = compress_block_data(&data, CompressionAlgorithm::Zstd, 5).unwrap();
1154
1155        // Should compress well with repetitive data
1156        assert!(compressed.len() < data.len());
1157
1158        // Decompress and verify
1159        let decompressed = decompress_block_data(&compressed, CompressionAlgorithm::Zstd).unwrap();
1160        assert_eq!(data, decompressed);
1161    }
1162
1163    #[test]
1164    fn test_estimate_compression_savings() {
1165        use crate::CompressionAlgorithm;
1166
1167        let data = bytes::Bytes::from("a".repeat(1000));
1168        let ratio = estimate_compression_savings(&data, CompressionAlgorithm::Zstd, 5).unwrap();
1169
1170        // Highly repetitive data should compress very well
1171        assert!(ratio < 0.1);
1172    }
1173
1174    #[test]
1175    fn test_should_compress() {
1176        use crate::CompressionAlgorithm;
1177
1178        // Small data should not be compressed
1179        let small = bytes::Bytes::from_static(b"Hello");
1180        assert!(!should_compress(&small, CompressionAlgorithm::Zstd, 3).unwrap());
1181
1182        // Large repetitive data should be compressed
1183        let large = bytes::Bytes::from("a".repeat(10000));
1184        assert!(should_compress(&large, CompressionAlgorithm::Zstd, 3).unwrap());
1185
1186        // None algorithm should never compress
1187        assert!(!should_compress(&large, CompressionAlgorithm::None, 3).unwrap());
1188    }
1189
1190    #[test]
1191    fn test_recommended_compression() {
1192        use crate::CompressionAlgorithm;
1193
1194        assert_eq!(recommended_compression(true), CompressionAlgorithm::Zstd);
1195        assert_eq!(recommended_compression(false), CompressionAlgorithm::Lz4);
1196    }
1197}