hexz_core/ops/write.rs
1//! Low-level write operations for Hexz snapshots.
2//!
3//! This module provides the foundational building blocks for writing compressed,
4//! encrypted, and deduplicated blocks to snapshot files. These functions implement
5//! the core write semantics used by higher-level pack operations while remaining
6//! independent of the packing workflow.
7//!
8//! # Module Purpose
9//!
10//! The write operations module serves as the bridge between the high-level packing
11//! pipeline and the raw file I/O layer. It encapsulates the logic for:
12//!
13//! - **Block Writing**: Transform raw chunks into compressed, encrypted blocks
14//! - **Deduplication**: Detect and eliminate redundant blocks via content hashing
15//! - **Zero Optimization**: Handle sparse data efficiently without storage
16//! - **Metadata Generation**: Create BlockInfo descriptors for index building
17//!
18//! # Design Philosophy
19//!
20//! These functions are designed to be composable, stateless, and easily testable.
21//! They operate on raw byte buffers and writers without knowledge of the broader
22//! packing context (progress reporting, stream management, index organization).
23//!
24//! This separation enables:
25//! - Unit testing of write logic in isolation
26//! - Reuse in different packing strategies (single-stream, multi-threaded, streaming)
27//! - Clear separation of concerns (write vs. orchestration)
28//!
29//! # Write Operation Semantics
30//!
31//! ## Block Transformation Pipeline
32//!
33//! Each block undergoes a multi-stage transformation before being written:
34//!
35//! ```text
36//! Raw Chunk (input)
37//! ↓
38//! ┌────────────────┐
39//! │ Compression │ → Compress using LZ4 or Zstd
40//! └────────────────┘ (reduces size, increases CPU)
41//! ↓
42//! ┌────────────────┐
43//! │ Encryption │ → Optional AES-256-GCM with block_idx nonce
44//! └────────────────┘ (confidentiality + integrity)
45//! ↓
46//! ┌────────────────┐
47//! │ Checksum │ → CRC32 of final data (fast integrity check)
48//! └────────────────┘
49//! ↓
50//! ┌────────────────┐
51//! │ Deduplication │ → BLAKE3 hash lookup (skip write if duplicate)
52//! └────────────────┘ (disabled for encrypted data)
53//! ↓
54//! ┌────────────────┐
55//! │ Write │ → Append to output file at current offset
56//! └────────────────┘
57//! ↓
58//! BlockInfo (metadata: offset, length, checksum)
59//! ```
60//!
61//! ## Write Behavior and Atomicity
62//!
63//! ### Single Block Writes
64//!
65//! Individual block writes via [`write_block`] are atomic with respect to the
66//! underlying file system's write atomicity guarantees:
67//!
68//! - **Buffered writes**: Data passes through OS page cache
69//! - **No fsync**: Writes are not flushed to disk until the writer is closed
70//! - **Partial write handling**: Writer's `write_all` ensures complete writes or error
71//! - **Crash behavior**: Partial blocks may be written if process crashes mid-write
72//!
73//! ### Deduplication State
74//!
75//! The deduplication map is maintained externally (by the caller). This design allows:
76//! - **Flexibility**: Caller controls when/if to enable deduplication
77//! - **Memory control**: Map lifetime and size managed by orchestration layer
78//! - **Consistency**: Map updates are immediately visible to subsequent writes
79//!
80//! ### Offset Management
81//!
82//! The `current_offset` parameter is updated atomically after each successful write.
83//! This ensures:
84//! - **Sequential allocation**: Blocks are laid out contiguously in file
85//! - **No gaps**: Every byte between header and master index is utilized
86//! - **Predictable layout**: Physical offset increases monotonically
87//!
88//! ## Block Allocation Strategy
89//!
90//! Blocks are allocated sequentially in the order they are written:
91//!
92//! ```text
93//! File Layout:
94//! ┌──────────────┬──────────┬──────────┬──────────┬─────────────┐
95//! │ Header (512B)│ Block 0 │ Block 1 │ Block 2 │ Index Pages │
96//! └──────────────┴──────────┴──────────┴──────────┴─────────────┘
97//! ↑ ↑ ↑ ↑
98//! 0 512 512+len0 512+len0+len1
99//!
100//! current_offset advances after each write:
101//! - Initial: 512 (after header)
102//! - After Block 0: 512 + len0
103//! - After Block 1: 512 + len0 + len1
104//! - After Block 2: 512 + len0 + len1 + len2
105//! ```
106//!
107//! ### Deduplication Impact
108//!
109//! When deduplication detects a duplicate block:
110//! - **No physical write**: Block is not written to disk
111//! - **Offset reuse**: BlockInfo references the existing block's offset
112//! - **Space savings**: Multiple logical blocks share one physical block
113//! - **Transparency**: Readers cannot distinguish between deduplicated and unique blocks
114//!
115//! Example with deduplication:
116//!
117//! ```text
118//! Logical Blocks: [A, B, A, C, B]
119//! Physical Blocks: [A, B, C]
120//! ↑ ↑ ↑
121//! │ │ └─ Block 3 (unique)
122//! │ └─ Block 1 (unique)
123//! └─ Block 0 (unique)
124//!
125//! BlockInfo for logical block 2: offset = offset_of(A), length = len(A)
126//! BlockInfo for logical block 4: offset = offset_of(B), length = len(B)
127//! ```
128//!
129//! ## Buffer Management
130//!
131//! This module does not perform explicit buffer management. All buffers are:
132//!
133//! - **Caller-allocated**: Input chunks are provided by caller
134//! - **Temporary allocations**: Compression/encryption output is allocated, then consumed
135//! - **No pooling**: Each operation allocates fresh buffers (GC handles reclamation)
136//!
137//! For high-performance scenarios, callers should consider:
138//! - Reusing chunk buffers across iterations
139//! - Using buffer pools for compression output (requires refactoring)
140//! - Batch writes to amortize allocation overhead
141//!
142//! ## Flush Behavior
143//!
144//! Functions in this module do NOT flush data to disk. Flushing is the caller's
145//! responsibility and typically occurs:
146//!
147//! - After writing all blocks and indices (in [`pack_snapshot`](crate::ops::pack::pack_snapshot))
148//! - Before closing the output file
149//! - Never during block writing (to maximize write batching)
150//!
151//! This design allows the OS to batch writes for optimal I/O performance.
152//!
153//! # Error Handling and Recovery
154//!
155//! ## Error Categories
156//!
157//! Write operations can fail for several reasons:
158//!
159//! ### I/O Errors
160//!
161//! - **Disk full**: No space for compressed block (`ENOSPC`)
162//! - **Permission denied**: Writer lacks write permission (`EACCES`)
163//! - **Device error**: Hardware failure, I/O timeout (`EIO`)
164//!
165//! These surface as `Error::Io` wrapping the underlying `std::io::Error`.
166//!
167//! ### Compression Errors
168//!
169//! - **Compression failure**: Compressor returns error (rare, usually indicates bug)
170//! - **Incompressible data**: Not an error; stored with expansion
171//!
172//! These surface as `Error::Compression`.
173//!
174//! ### Encryption Errors
175//!
176//! - **Cipher initialization failure**: Invalid state (should not occur in practice)
177//! - **Encryption failure**: Crypto operation fails (indicates library bug)
178//!
179//! These surface as `Error::Encryption`.
180//!
181//! ## Error Recovery
182//!
183//! Write operations provide **no automatic recovery**. On error:
184//!
185//! - **Function returns immediately**: No cleanup or rollback
186//! - **File state undefined**: Partial data may be written
187//! - **Caller responsibility**: Must handle error and clean up
188//!
189//! Typical error handling pattern in pack operations:
190//!
191//! ```text
192//! match write_block_simple(...) {
193//! Ok(info) => {
194//! // Success: Add info to index, continue
195//! }
196//! Err(e) => {
197//! // Failure: Log error, delete partial output file, return error to caller
198//! std::fs::remove_file(output)?;
199//! return Err(e);
200//! }
201//! }
202//! ```
203//!
204//! ## Partial Write Handling
205//!
206//! The underlying `Write::write_all` method ensures atomic writes of complete blocks:
207//!
208//! - **Success**: Entire block written, offset updated
209//! - **Failure**: Partial write may occur, but error is returned
210//! - **No retry**: Caller must handle retries if desired
211//!
212//! # Performance Characteristics
213//!
214//! ## Write Throughput
215//!
216//! Block write performance is dominated by compression:
217//!
218//! - **LZ4**: ~2 GB/s (minimal overhead)
219//! - **Zstd level 3**: ~200-500 MB/s (depends on data)
220//! - **Encryption**: ~1-2 GB/s (hardware AES-NI)
221//! - **BLAKE3 hashing**: ~3200 MB/s (for deduplication)
222//!
223//! Typical bottleneck: Compression CPU time.
224//!
225//! ## Deduplication Overhead
226//!
227//! BLAKE3 hashing adds ~5-10% overhead to write operations:
228//!
229//! - **Hash computation**: ~3200 MB/s throughput (BLAKE3 tree-hashed)
230//! - **HashMap lookup**: O(1) average, ~50-100 ns per lookup
231//! - **Memory usage**: ~48 bytes per unique block
232//!
233//! For datasets with <10% duplication, deduplication overhead may exceed savings.
234//! Consider disabling dedup for unique data.
235//!
236//! ## Zero Block Detection
237//!
238//! [`is_zero_chunk`] uses SIMD-optimized comparison on modern CPUs:
239//!
240//! - **Throughput**: ~10-20 GB/s (memory bandwidth limited)
241//! - **Overhead**: Negligible (~5-10 cycles per 64-byte cache line)
242//!
243//! Zero detection is always worth enabling for sparse data.
244//!
245//! # Memory Usage
246//!
247//! Per-block memory allocation:
248//!
249//! - **Input chunk**: Caller-provided (typically 64 KiB)
250//! - **Compression output**: ~1.5× chunk size worst case (incompressible data)
251//! - **Encryption output**: compression_size + 28 bytes (AES-GCM overhead)
252//! - **Dedup hash**: 32 bytes (BLAKE3 digest)
253//!
254//! Total temporary allocation per write: ~100-150 KiB (released immediately after write).
255//!
256//! # Examples
257//!
258//! See individual function documentation for usage examples.
259//!
260//! # Future Enhancements
261//!
262//! Potential improvements to write operations:
263//!
264//! - **Buffer pooling**: Reuse compression/encryption buffers to reduce allocation overhead
265//! - **Async I/O**: Use `tokio` or `io_uring` for overlapped writes
266//! - **Parallel writes**: Write multiple blocks concurrently (requires coordination)
267//! - **Write-ahead logging**: Enable atomic commits for crash safety
268
269use hexz_common::Result;
270use std::io::Write;
271
272use crate::algo::compression::Compressor;
273use crate::algo::dedup::hash_table::DedupHashTable;
274use crate::algo::encryption::Encryptor;
275use crate::algo::hashing::ContentHasher;
276use crate::format::index::BlockInfo;
277
278/// Writes a compressed and optionally encrypted block to the output stream.
279///
280/// This function implements the complete block transformation pipeline: compression,
281/// optional encryption, checksum computation, deduplication, and physical write.
282/// It returns a `BlockInfo` descriptor suitable for inclusion in an index page.
283///
284/// # Transformation Pipeline
285///
286/// 1. **Compression**: Compress raw chunk using provided compressor (LZ4 or Zstd)
287/// 2. **Encryption** (optional): Encrypt compressed data with AES-256-GCM using block_idx as nonce
288/// 3. **Checksum**: Compute CRC32 of final data for integrity verification
289/// 4. **Deduplication** (optional, not for encrypted):
290/// - Compute BLAKE3 hash of final data
291/// - Check dedup_map for existing block with same hash
292/// - If found: Reuse existing offset, skip write
293/// - If new: Write block, record offset in dedup_map
294/// 5. **Write**: Append final data to output at current_offset
295/// 6. **Metadata**: Create and return BlockInfo with offset, length, checksum
296///
297/// # Parameters
298///
299/// - `out`: Output writer implementing `Write` trait
300/// - Typically a `File` or `BufWriter<File>`
301/// - Must support `write_all` for atomic block writes
302///
303/// - `chunk`: Uncompressed chunk data (raw bytes)
304/// - Typical size: 16 KiB - 256 KiB (configurable)
305/// - Must not be empty (undefined behavior for zero-length chunks)
306///
307/// - `block_idx`: Global block index (zero-based)
308/// - Used as encryption nonce (must be unique per snapshot)
309/// - Monotonically increases across all streams
310/// - Must not reuse indices within same encrypted snapshot (breaks security)
311///
312/// - `current_offset`: Mutable reference to current physical file offset
313/// - Updated after successful write: `*current_offset += bytes_written`
314/// - Not updated on error (file state undefined)
315/// - Not updated for deduplicated blocks (reuses existing offset)
316///
317/// - `dedup_map`: Optional deduplication hash table
318/// - `Some(&mut map)`: Enable dedup, use this map
319/// - `None`: Disable dedup, always write
320/// - Ignored if `encryptor.is_some()` (encryption prevents dedup)
321/// - Maps BLAKE3 hash → physical offset of first occurrence
322///
323/// - `compressor`: Compression algorithm implementation
324/// - Typically `Lz4Compressor` or `ZstdCompressor`
325/// - Must implement [`Compressor`] trait
326///
327/// - `encryptor`: Optional encryption implementation
328/// - `Some(enc)`: Encrypt compressed data with AES-256-GCM
329/// - `None`: Store compressed data unencrypted
330/// - Must implement [`Encryptor`] trait
331///
332/// - `hasher`: Content hasher for deduplication
333/// - Typically `Blake3Hasher`
334/// - Must implement [`ContentHasher`] trait
335/// - Used only when dedup_map is Some and encryptor is None
336///
337/// - `hash_buf`: Reusable buffer for hash output (must be ≥32 bytes)
338/// - Avoids allocation on every hash computation
339/// - Only used when dedup is enabled
340///
341/// # Returns
342///
343/// - `Ok(BlockInfo)`: Block written successfully, metadata returned
344/// - `offset`: Physical byte offset where block starts
345/// - `length`: Compressed (and encrypted) size in bytes
346/// - `logical_len`: Original uncompressed size
347/// - `checksum`: CRC32 of final data (compressed + encrypted)
348///
349/// - `Err(Error::Io)`: I/O error during write
350/// - Disk full, permission denied, device error
351/// - File state undefined (partial write may have occurred)
352///
353/// - `Err(Error::Compression)`: Compression failed
354/// - Rare; usually indicates library bug or corrupted input
355///
356/// - `Err(Error::Encryption)`: Encryption failed
357/// - Rare; usually indicates crypto library bug
358///
359/// # Examples
360///
361/// ## Basic Usage (No Encryption, No Dedup)
362///
363/// ```no_run
364/// use hexz_core::ops::write::write_block;
365/// use hexz_core::algo::compression::Lz4Compressor;
366/// use hexz_core::algo::hashing::blake3::Blake3Hasher;
367/// use std::collections::HashMap;
368/// use std::fs::File;
369///
370/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
371/// let mut out = File::create("output.hxz")?;
372/// let mut offset = 512u64; // After header
373/// let chunk = vec![0x42; 65536]; // 64 KiB of data
374/// let compressor = Lz4Compressor::new();
375/// let hasher = Blake3Hasher;
376/// let mut hash_buf = [0u8; 32];
377///
378/// let info = write_block(
379/// &mut out,
380/// &chunk,
381/// 0, // block_idx
382/// &mut offset,
383/// None::<&mut HashMap<[u8; 32], u64>>, // No dedup
384/// &compressor,
385/// None, // No encryption
386/// &hasher,
387/// &mut hash_buf,
388/// )?;
389///
390/// println!("Block written at offset {}, size {}", info.offset, info.length);
391/// # Ok(())
392/// # }
393/// ```
394///
395/// ## With Deduplication
396///
397/// ```no_run
398/// use hexz_core::ops::write::write_block;
399/// use hexz_core::algo::compression::Lz4Compressor;
400/// use hexz_core::algo::hashing::blake3::Blake3Hasher;
401/// use std::collections::HashMap;
402/// use std::fs::File;
403///
404/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
405/// let mut out = File::create("output.hxz")?;
406/// let mut offset = 512u64;
407/// let mut dedup_map: HashMap<[u8; 32], u64> = HashMap::new();
408/// let compressor = Lz4Compressor::new();
409/// let hasher = Blake3Hasher;
410/// let mut hash_buf = [0u8; 32];
411///
412/// // Write first block
413/// let chunk1 = vec![0xAA; 65536];
414/// let info1 = write_block(
415/// &mut out,
416/// &chunk1,
417/// 0,
418/// &mut offset,
419/// Some(&mut dedup_map),
420/// &compressor,
421/// None,
422/// &hasher,
423/// &mut hash_buf,
424/// )?;
425/// println!("Block 0: offset={}, written", info1.offset);
426///
427/// // Write duplicate block (same content)
428/// let chunk2 = vec![0xAA; 65536];
429/// let info2 = write_block(
430/// &mut out,
431/// &chunk2,
432/// 1,
433/// &mut offset,
434/// Some(&mut dedup_map),
435/// &compressor,
436/// None,
437/// &hasher,
438/// &mut hash_buf,
439/// )?;
440/// println!("Block 1: offset={}, deduplicated (no write)", info2.offset);
441/// assert_eq!(info1.offset, info2.offset); // Same offset, block reused
442/// # Ok(())
443/// # }
444/// ```
445///
446/// ## With Encryption
447///
448/// ```no_run
449/// use hexz_core::ops::write::write_block;
450/// use hexz_core::algo::compression::Lz4Compressor;
451/// use hexz_core::algo::encryption::AesGcmEncryptor;
452/// use hexz_core::algo::hashing::blake3::Blake3Hasher;
453/// use hexz_common::crypto::KeyDerivationParams;
454/// use std::collections::HashMap;
455/// use std::fs::File;
456///
457/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
458/// let mut out = File::create("output.hxz")?;
459/// let mut offset = 512u64;
460/// let compressor = Lz4Compressor::new();
461/// let hasher = Blake3Hasher;
462/// let mut hash_buf = [0u8; 32];
463///
464/// // Initialize encryptor
465/// let params = KeyDerivationParams::default();
466/// let encryptor = AesGcmEncryptor::new(
467/// b"strong_password",
468/// ¶ms.salt,
469/// params.iterations,
470/// )?;
471///
472/// let chunk = vec![0x42; 65536];
473/// let info = write_block(
474/// &mut out,
475/// &chunk,
476/// 0,
477/// &mut offset,
478/// None::<&mut HashMap<[u8; 32], u64>>, // Dedup disabled (encryption prevents it)
479/// &compressor,
480/// Some(&encryptor),
481/// &hasher,
482/// &mut hash_buf,
483/// )?;
484///
485/// println!("Encrypted block: offset={}, length={}", info.offset, info.length);
486/// # Ok(())
487/// # }
488/// ```
489///
490/// # Performance
491///
492/// - **Compression**: Dominates runtime (~2 GB/s LZ4, ~500 MB/s Zstd)
493/// - **Encryption**: ~1-2 GB/s (hardware AES-NI)
494/// - **Hashing**: ~3200 MB/s (BLAKE3 for dedup)
495/// - **I/O**: Typically not bottleneck (buffered writes, ~3 GB/s sequential)
496///
497/// # Deduplication Effectiveness
498///
499/// Deduplication is most effective when:
500/// - **Fixed-size blocks**: Same content → same boundaries → same hash
501/// - **Unencrypted**: Encryption produces unique ciphertext per block (different nonces)
502/// - **Redundant data**: Duplicate files, repeated patterns, copy-on-write filesystems
503///
504/// Deduplication is ineffective when:
505/// - **Content-defined chunking**: Small shifts cause different boundaries
506/// - **Compressed input**: Pre-compressed data has low redundancy
507/// - **Unique data**: No duplicate blocks to detect
508///
509/// # Security Considerations
510///
511/// ## Block Index as Nonce
512///
513/// When encrypting, `block_idx` is used as part of the AES-GCM nonce. **CRITICAL**:
514/// - Never reuse `block_idx` values within the same encrypted snapshot
515/// - Nonce reuse breaks AES-GCM security (allows plaintext recovery)
516/// - Each logical block must have a unique index
517///
518/// ## Deduplication and Encryption
519///
520/// Deduplication is automatically disabled when encrypting because:
521/// - Each block has a unique nonce → unique ciphertext
522/// - BLAKE3(ciphertext1) ≠ BLAKE3(ciphertext2) even if plaintext is identical
523/// - Attempting dedup with encryption wastes CPU (hashing) without space savings
524///
525/// # Thread Safety
526///
527/// This function is **not thread-safe** with respect to the output writer:
528/// - Concurrent calls with the same `out` writer will interleave writes (corruption)
529/// - Concurrent calls with different writers to the same file will corrupt file
530///
531/// For parallel writing, use separate output files or implement external synchronization.
532///
533/// The dedup_map must also be externally synchronized for concurrent access.
534#[allow(clippy::too_many_arguments)]
535pub fn write_block<W: Write, D: DedupHashTable>(
536 out: &mut W,
537 chunk: &[u8],
538 block_idx: u64,
539 current_offset: &mut u64,
540 dedup_map: Option<&mut D>,
541 compressor: &dyn Compressor,
542 encryptor: Option<&dyn Encryptor>,
543 hasher: &dyn ContentHasher,
544 hash_buf: &mut [u8; 32],
545) -> Result<BlockInfo> {
546 // Compress the chunk
547 let compressed = compressor.compress(chunk)?;
548
549 // Encrypt if requested
550 let final_data = if let Some(enc) = encryptor {
551 enc.encrypt(&compressed, block_idx)?
552 } else {
553 compressed
554 };
555
556 let checksum = crc32fast::hash(&final_data);
557 let chunk_len = chunk.len() as u32;
558
559 // Handle deduplication (only if not encrypting)
560 let offset = if encryptor.is_some() {
561 // No dedup for encrypted data
562 let off = *current_offset;
563 out.write_all(&final_data)?;
564 *current_offset += final_data.len() as u64;
565 off
566 } else if let Some(map) = dedup_map {
567 // Hash directly into the fixed-size buffer (no runtime bounds check).
568 *hash_buf = hasher.hash_fixed(&final_data);
569
570 if let Some(existing_offset) = map.get(hash_buf) {
571 // Block already exists, reuse it — no copy needed on hit
572 existing_offset
573 } else {
574 // New block: copy hash_buf only on miss (insert needs owned key)
575 let off = *current_offset;
576 map.insert(*hash_buf, off);
577 out.write_all(&final_data)?;
578 *current_offset += final_data.len() as u64;
579 off
580 }
581 } else {
582 // No dedup, just write
583 let off = *current_offset;
584 out.write_all(&final_data)?;
585 *current_offset += final_data.len() as u64;
586 off
587 };
588
589 Ok(BlockInfo {
590 offset,
591 length: final_data.len() as u32,
592 logical_len: chunk_len,
593 checksum,
594 })
595}
596
597/// Creates a zero-block descriptor without writing data to disk.
598///
599/// Zero blocks (all-zero chunks) are a special case optimized for space efficiency.
600/// Instead of compressing and storing zeros, we create a metadata-only descriptor
601/// that signals to the reader to return zeros without performing any I/O.
602///
603/// # Sparse Data Optimization
604///
605/// Many VM disk images and memory dumps contain large regions of zeros:
606/// - **Unallocated disk space**: File systems often zero-initialize blocks
607/// - **Memory pages**: Unused or zero-initialized memory
608/// - **Sparse files**: Holes in sparse file systems
609///
610/// Storing these zeros (even compressed) wastes space:
611/// - **LZ4-compressed zeros**: ~100 bytes per 64 KiB block (~0.15% of original)
612/// - **Uncompressed zeros**: 64 KiB per block (100%)
613/// - **Metadata-only**: 20 bytes per block (~0.03%)
614///
615/// The metadata approach saves 99.97% of space for zero blocks.
616///
617/// # Descriptor Format
618///
619/// Zero blocks are identified by a special BlockInfo signature:
620/// - `offset = 0`: Invalid physical offset (data region starts at ≥512)
621/// - `length = 0`: No physical storage
622/// - `logical_len = N`: Original zero block size in bytes
623/// - `checksum = 0`: No checksum needed (zeros are deterministic)
624///
625/// Readers recognize this pattern and synthesize zeros without I/O.
626///
627/// # Parameters
628///
629/// - `logical_len`: Size of the zero block in bytes
630/// - Typically matches block_size (e.g., 65536 for 64 KiB blocks)
631/// - Can vary with content-defined chunking
632/// - Must be > 0 (zero-length blocks are invalid)
633///
634/// # Returns
635///
636/// `BlockInfo` descriptor with zero-block semantics:
637/// - `offset = 0`
638/// - `length = 0`
639/// - `logical_len = logical_len`
640/// - `checksum = 0`
641///
642/// # Examples
643///
644/// ## Detecting and Creating Zero Blocks
645///
646/// ```
647/// use hexz_core::ops::write::{is_zero_chunk, create_zero_block};
648/// use hexz_core::format::index::BlockInfo;
649///
650/// let chunk = vec![0u8; 65536]; // 64 KiB of zeros
651///
652/// if is_zero_chunk(&chunk) {
653/// let info = create_zero_block(chunk.len() as u32);
654/// assert_eq!(info.offset, 0);
655/// assert_eq!(info.length, 0);
656/// assert_eq!(info.logical_len, 65536);
657/// println!("Zero block: No storage required!");
658/// }
659/// ```
660///
661/// ## Usage in Packing Loop
662///
663/// ```no_run
664/// # use hexz_core::ops::write::{is_zero_chunk, create_zero_block, write_block};
665/// # use hexz_core::algo::compression::Lz4Compressor;
666/// # use hexz_core::algo::hashing::blake3::Blake3Hasher;
667/// # use std::collections::HashMap;
668/// # use std::fs::File;
669/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
670/// # let mut out = File::create("output.hxz")?;
671/// # let mut offset = 512u64;
672/// # let compressor = Lz4Compressor::new();
673/// # let hasher = Blake3Hasher;
674/// # let mut hash_buf = [0u8; 32];
675/// # let chunks: Vec<Vec<u8>> = vec![];
676/// for (idx, chunk) in chunks.iter().enumerate() {
677/// let info = if is_zero_chunk(chunk) {
678/// // Optimize: No compression, no write
679/// create_zero_block(chunk.len() as u32)
680/// } else {
681/// // Normal path: Compress and write
682/// write_block(&mut out, chunk, idx as u64, &mut offset, None::<&mut HashMap<[u8; 32], u64>>, &compressor, None, &hasher, &mut hash_buf)?
683/// };
684/// // Add info to index page...
685/// }
686/// # Ok(())
687/// # }
688/// ```
689///
690/// # Performance
691///
692/// - **Time complexity**: O(1) (no I/O, no computation)
693/// - **Space complexity**: O(1) (fixed-size struct)
694/// - **Typical savings**: 99.97% vs. compressed zeros
695///
696/// # Reader Behavior
697///
698/// When a reader encounters a zero block (offset=0, length=0):
699/// 1. Recognize zero-block pattern from metadata
700/// 2. Allocate buffer of size `logical_len`
701/// 3. Fill buffer with zeros (optimized memset)
702/// 4. Return buffer to caller
703///
704/// No decompression, decryption, or checksum verification is performed.
705///
706/// # Interaction with Deduplication
707///
708/// Zero blocks do not participate in deduplication:
709/// - They are never written to disk → no physical offset → no dedup entry
710/// - Each zero block gets its own metadata descriptor
711/// - This is fine: Metadata is cheap (20 bytes), and all zero blocks have same content
712///
713/// # Interaction with Encryption
714///
715/// Zero blocks work correctly with encryption:
716/// - They are detected **before** compression/encryption
717/// - Encrypted snapshots still use zero-block optimization
718/// - Readers synthesize zeros without decryption
719///
720/// This is safe because zeros are public information (no confidentiality lost).
721///
722/// # Validation
723///
724/// **IMPORTANT**: This function does NOT validate that the original chunk was actually
725/// all zeros. The caller is responsible for calling [`is_zero_chunk`] first.
726///
727/// If a non-zero chunk is incorrectly marked as a zero block, readers will return
728/// zeros instead of the original data (silent data corruption).
729pub fn create_zero_block(logical_len: u32) -> BlockInfo {
730 BlockInfo {
731 offset: 0,
732 length: 0,
733 logical_len,
734 checksum: 0,
735 }
736}
737
738/// Convenience wrapper for `write_block` that allocates hasher and buffer internally.
739///
740/// This is a simpler API for tests and one-off writes. For hot paths (like snapshot
741/// packing loops), use `write_block` directly with a reused hasher and buffer.
742#[allow(dead_code)]
743fn write_block_simple<W: Write, D: DedupHashTable>(
744 out: &mut W,
745 chunk: &[u8],
746 block_idx: u64,
747 current_offset: &mut u64,
748 dedup_map: Option<&mut D>,
749 compressor: &dyn Compressor,
750 encryptor: Option<&dyn Encryptor>,
751) -> Result<BlockInfo> {
752 use crate::algo::hashing::blake3::Blake3Hasher;
753 let hasher = Blake3Hasher;
754 let mut hash_buf = [0u8; 32];
755 write_block(
756 out,
757 chunk,
758 block_idx,
759 current_offset,
760 dedup_map,
761 compressor,
762 encryptor,
763 &hasher,
764 &mut hash_buf,
765 )
766}
767
768/// Checks if a chunk consists entirely of zero bytes.
769///
770/// This function efficiently detects all-zero chunks to enable sparse block optimization.
771/// Zero chunks are common in VM images (unallocated space), memory dumps (zero-initialized
772/// pages), and sparse files.
773///
774/// # Algorithm
775///
776/// Uses Rust's iterator `all()` combinator, which:
777/// - Short-circuits on first non-zero byte (early exit)
778/// - Compiles to SIMD instructions on modern CPUs (autovectorization)
779/// - Typically processes 16-32 bytes per instruction (AVX2/AVX-512)
780///
781/// # Parameters
782///
783/// - `chunk`: Byte slice to check
784/// - Empty slices return `true` (vacuous truth)
785/// - Typical size: 16 KiB - 256 KiB (configurable block size)
786///
787/// # Returns
788///
789/// - `true`: All bytes are zero (sparse block, use [`create_zero_block`])
790/// - `false`: At least one non-zero byte (normal block, compress and write)
791///
792/// # Performance
793///
794/// Modern CPUs with SIMD support achieve excellent throughput:
795///
796/// - **SIMD-optimized**: ~10-20 GB/s (memory bandwidth limited)
797/// - **Scalar fallback**: ~1-2 GB/s (without SIMD)
798/// - **Typical overhead**: <1% of total packing time
799///
800/// The check is always worth performing given the massive space savings for zero blocks.
801///
802/// # Examples
803///
804/// ## Basic Usage
805///
806/// ```
807/// use hexz_core::ops::write::is_zero_chunk;
808///
809/// let zeros = vec![0u8; 65536];
810/// assert!(is_zero_chunk(&zeros));
811///
812/// let data = vec![0u8, 1u8, 0u8];
813/// assert!(!is_zero_chunk(&data));
814///
815/// let empty: &[u8] = &[];
816/// assert!(is_zero_chunk(empty)); // Empty is considered "all zeros"
817/// ```
818///
819/// ## Packing Loop Integration
820///
821/// ```no_run
822/// # use hexz_core::ops::write::{is_zero_chunk, create_zero_block, write_block};
823/// # use hexz_core::algo::compression::Lz4Compressor;
824/// # use hexz_core::algo::hashing::blake3::Blake3Hasher;
825/// # use hexz_core::format::index::BlockInfo;
826/// # use std::collections::HashMap;
827/// # use std::fs::File;
828/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
829/// # let mut out = File::create("output.hxz")?;
830/// # let mut offset = 512u64;
831/// # let compressor = Lz4Compressor::new();
832/// # let hasher = Blake3Hasher;
833/// # let mut hash_buf = [0u8; 32];
834/// # let mut index_blocks = Vec::new();
835/// # let chunks: Vec<Vec<u8>> = vec![];
836/// for (idx, chunk) in chunks.iter().enumerate() {
837/// let info = if is_zero_chunk(chunk) {
838/// // Fast path: No compression, no write, just metadata
839/// create_zero_block(chunk.len() as u32)
840/// } else {
841/// // Slow path: Compress, write, create metadata
842/// write_block(&mut out, chunk, idx as u64, &mut offset, None::<&mut HashMap<[u8; 32], u64>>, &compressor, None, &hasher, &mut hash_buf)?
843/// };
844/// index_blocks.push(info);
845/// }
846/// # Ok(())
847/// # }
848/// ```
849///
850/// ## Benchmarking Zero Detection
851///
852/// ```
853/// use hexz_core::ops::write::is_zero_chunk;
854/// use std::time::Instant;
855///
856/// let chunk = vec![0u8; 64 * 1024 * 1024]; // 64 MiB
857/// let start = Instant::now();
858///
859/// for _ in 0..100 {
860/// let _ = is_zero_chunk(&chunk);
861/// }
862///
863/// let elapsed = start.elapsed();
864/// let throughput = (64.0 * 100.0) / elapsed.as_secs_f64(); // MB/s
865/// println!("Zero detection: {:.1} GB/s", throughput / 1024.0);
866/// ```
867///
868/// # SIMD Optimization
869///
870/// On x86-64 with AVX2, the compiler typically generates code like:
871///
872/// ```text
873/// vpxor ymm0, ymm0, ymm0 ; Zero register
874/// loop:
875/// vmovdqu ymm1, [rsi] ; Load 32 bytes
876/// vpcmpeqb ymm2, ymm1, ymm0 ; Compare with zero
877/// vpmovmskb eax, ymm2 ; Extract comparison mask
878/// cmp eax, 0xFFFFFFFF ; All zeros?
879/// jne found_nonzero ; Early exit if not
880/// add rsi, 32 ; Advance pointer
881/// loop
882/// ```
883///
884/// This processes 32 bytes per iteration (~1-2 cycles on modern CPUs).
885///
886/// # Edge Cases
887///
888/// - **Empty chunks**: Return `true` (vacuous truth, no non-zero bytes)
889/// - **Single byte**: Works correctly, no special handling needed
890/// - **Unaligned chunks**: SIMD code handles unaligned loads transparently
891///
892/// # Alternative Implementations
893///
894/// Other possible implementations (not currently used):
895///
896/// 1. **Manual SIMD**: Use `std::arch` for explicit SIMD (faster but less portable)
897/// 2. **Chunked comparison**: Process in 8-byte chunks with `u64` casts (faster scalar)
898/// 3. **Bitmap scan**: Use CPU's `bsf`/`tzcnt` to skip zero regions (complex)
899///
900/// Current implementation relies on compiler autovectorization, which works well
901/// in practice and maintains portability.
902///
903/// # Correctness
904///
905/// This function is pure and infallible:
906/// - No side effects (read-only operation)
907/// - No panics (iterator `all()` is safe for all inputs)
908/// - No undefined behavior (all byte patterns are valid)
909pub fn is_zero_chunk(chunk: &[u8]) -> bool {
910 chunk.iter().all(|&b| b == 0)
911}
912
913#[cfg(test)]
914mod tests {
915 use super::*;
916 use crate::algo::compression::{Lz4Compressor, ZstdCompressor};
917 use crate::algo::encryption::AesGcmEncryptor;
918 use std::collections::HashMap;
919 use std::io::Cursor;
920
921 /// Convenience wrapper that calls write_block_simple with no dedup map.
922 fn write_block_no_dedup<W: Write>(
923 out: &mut W,
924 chunk: &[u8],
925 block_idx: u64,
926 current_offset: &mut u64,
927 compressor: &dyn Compressor,
928 encryptor: Option<&dyn Encryptor>,
929 ) -> Result<BlockInfo> {
930 write_block_simple(
931 out,
932 chunk,
933 block_idx,
934 current_offset,
935 None::<&mut HashMap<[u8; 32], u64>>,
936 compressor,
937 encryptor,
938 )
939 }
940
941 #[test]
942 fn test_is_zero_chunk_all_zeros() {
943 let chunk = vec![0u8; 1024];
944 assert!(is_zero_chunk(&chunk));
945 }
946
947 #[test]
948 fn test_is_zero_chunk_with_nonzero() {
949 let mut chunk = vec![0u8; 1024];
950 chunk[512] = 1; // Single non-zero byte
951 assert!(!is_zero_chunk(&chunk));
952 }
953
954 #[test]
955 fn test_is_zero_chunk_all_nonzero() {
956 let chunk = vec![0xFFu8; 1024];
957 assert!(!is_zero_chunk(&chunk));
958 }
959
960 #[test]
961 fn test_is_zero_chunk_empty() {
962 let chunk: Vec<u8> = vec![];
963 assert!(is_zero_chunk(&chunk)); // Vacuous truth
964 }
965
966 #[test]
967 fn test_is_zero_chunk_single_zero() {
968 let chunk = vec![0u8];
969 assert!(is_zero_chunk(&chunk));
970 }
971
972 #[test]
973 fn test_is_zero_chunk_single_nonzero() {
974 let chunk = vec![1u8];
975 assert!(!is_zero_chunk(&chunk));
976 }
977
978 #[test]
979 fn test_create_zero_block() {
980 let logical_len = 65536;
981 let info = create_zero_block(logical_len);
982
983 assert_eq!(info.offset, 0);
984 assert_eq!(info.length, 0);
985 assert_eq!(info.logical_len, logical_len);
986 assert_eq!(info.checksum, 0);
987 }
988
989 #[test]
990 fn test_create_zero_block_various_sizes() {
991 for size in [1, 16, 1024, 4096, 65536, 1048576] {
992 let info = create_zero_block(size);
993 assert_eq!(info.offset, 0);
994 assert_eq!(info.length, 0);
995 assert_eq!(info.logical_len, size);
996 assert_eq!(info.checksum, 0);
997 }
998 }
999
1000 #[test]
1001 fn test_write_block_basic_lz4() {
1002 let mut output = Cursor::new(Vec::new());
1003 let mut offset = 512u64; // Start after header
1004 let chunk = vec![0xAAu8; 4096];
1005 let compressor = Lz4Compressor::new();
1006
1007 let result = write_block_no_dedup(&mut output, &chunk, 0, &mut offset, &compressor, None);
1008
1009 assert!(result.is_ok());
1010 let info = result.unwrap();
1011
1012 // Verify offset updated
1013 assert!(offset > 512);
1014
1015 // Verify block info
1016 assert_eq!(info.offset, 512);
1017 assert!(info.length > 0); // Compressed data written
1018 assert_eq!(info.logical_len, 4096);
1019 assert!(info.checksum != 0);
1020
1021 // Verify data was written
1022 let written = output.into_inner();
1023 assert_eq!(written.len(), (offset - 512) as usize);
1024 }
1025
1026 #[test]
1027 fn test_write_block_basic_zstd() {
1028 let mut output = Cursor::new(Vec::new());
1029 let mut offset = 512u64;
1030 let chunk = vec![0xAAu8; 4096];
1031 let compressor = ZstdCompressor::new(3, None);
1032
1033 let result = write_block_no_dedup(&mut output, &chunk, 0, &mut offset, &compressor, None);
1034
1035 assert!(result.is_ok());
1036 let info = result.unwrap();
1037
1038 assert_eq!(info.offset, 512);
1039 assert!(info.length > 0);
1040 assert_eq!(info.logical_len, 4096);
1041 }
1042
1043 #[test]
1044 fn test_write_block_incompressible_data() {
1045 let mut output = Cursor::new(Vec::new());
1046 let mut offset = 512u64;
1047
1048 // Random-ish data that doesn't compress well
1049 let chunk: Vec<u8> = (0..4096).map(|i| ((i * 7 + 13) % 256) as u8).collect();
1050 let compressor = Lz4Compressor::new();
1051
1052 let result = write_block_no_dedup(&mut output, &chunk, 0, &mut offset, &compressor, None);
1053
1054 assert!(result.is_ok());
1055 let info = result.unwrap();
1056
1057 // Even "incompressible" data might compress slightly or expand
1058 // Just verify it executed successfully
1059 assert_eq!(info.logical_len, chunk.len() as u32);
1060 assert!(info.length > 0);
1061 }
1062
1063 #[test]
1064 fn test_write_block_with_dedup_unique_blocks() {
1065 let mut output = Cursor::new(Vec::new());
1066 let mut offset = 512u64;
1067 let mut dedup_map = HashMap::new();
1068 let compressor = Lz4Compressor::new();
1069
1070 // Write first block
1071 let chunk1 = vec![0xAAu8; 4096];
1072 let info1 = write_block_simple(
1073 &mut output,
1074 &chunk1,
1075 0,
1076 &mut offset,
1077 Some(&mut dedup_map),
1078 &compressor,
1079 None,
1080 )
1081 .unwrap();
1082
1083 let offset_after_block1 = offset;
1084
1085 // Write second unique block
1086 let chunk2 = vec![0xBBu8; 4096];
1087 let info2 = write_block_simple(
1088 &mut output,
1089 &chunk2,
1090 1,
1091 &mut offset,
1092 Some(&mut dedup_map),
1093 &compressor,
1094 None,
1095 )
1096 .unwrap();
1097
1098 // Both blocks should be written
1099 assert_eq!(info1.offset, 512);
1100 assert_eq!(info2.offset, offset_after_block1);
1101 assert!(offset > offset_after_block1);
1102
1103 // Dedup map should have 2 entries
1104 assert_eq!(dedup_map.len(), 2);
1105 }
1106
1107 #[test]
1108 fn test_write_block_with_dedup_duplicate_blocks() {
1109 let mut output = Cursor::new(Vec::new());
1110 let mut offset = 512u64;
1111 let mut dedup_map = HashMap::new();
1112 let compressor = Lz4Compressor::new();
1113
1114 // Write first block
1115 let chunk1 = vec![0xAAu8; 4096];
1116 let info1 = write_block_simple(
1117 &mut output,
1118 &chunk1,
1119 0,
1120 &mut offset,
1121 Some(&mut dedup_map),
1122 &compressor,
1123 None,
1124 )
1125 .unwrap();
1126
1127 let offset_after_block1 = offset;
1128
1129 // Write duplicate block (same content)
1130 let chunk2 = vec![0xAAu8; 4096];
1131 let info2 = write_block_simple(
1132 &mut output,
1133 &chunk2,
1134 1,
1135 &mut offset,
1136 Some(&mut dedup_map),
1137 &compressor,
1138 None,
1139 )
1140 .unwrap();
1141
1142 // Second block should reuse first block's offset
1143 assert_eq!(info1.offset, info2.offset);
1144 assert_eq!(info1.length, info2.length);
1145 assert_eq!(info1.checksum, info2.checksum);
1146
1147 // Offset should not advance (no write)
1148 assert_eq!(offset, offset_after_block1);
1149
1150 // Dedup map should have 1 entry (deduplicated)
1151 assert_eq!(dedup_map.len(), 1);
1152 }
1153
1154 #[test]
1155 fn test_write_block_with_encryption() {
1156 let mut output = Cursor::new(Vec::new());
1157 let mut offset = 512u64;
1158 let chunk = vec![0xAAu8; 4096];
1159 let compressor = Lz4Compressor::new();
1160
1161 // Create encryptor
1162 let salt = [0u8; 32];
1163 let encryptor = AesGcmEncryptor::new(b"test_password", &salt, 100000).unwrap();
1164
1165 let result = write_block_no_dedup(
1166 &mut output,
1167 &chunk,
1168 0,
1169 &mut offset,
1170 &compressor,
1171 Some(&encryptor),
1172 );
1173
1174 assert!(result.is_ok());
1175 let info = result.unwrap();
1176
1177 // Encrypted data should be larger than compressed (adds GCM tag)
1178 assert!(info.length > 16); // At least tag overhead
1179 assert_eq!(info.logical_len, 4096);
1180 }
1181
1182 #[test]
1183 fn test_write_block_encryption_disables_dedup() {
1184 let mut output = Cursor::new(Vec::new());
1185 let mut offset = 512u64;
1186 let mut dedup_map = HashMap::new();
1187 let compressor = Lz4Compressor::new();
1188 let salt = [0u8; 32];
1189 let encryptor = AesGcmEncryptor::new(b"test_password", &salt, 100000).unwrap();
1190
1191 // Write first encrypted block
1192 let chunk1 = vec![0xAAu8; 4096];
1193 let info1 = write_block_simple(
1194 &mut output,
1195 &chunk1,
1196 0,
1197 &mut offset,
1198 Some(&mut dedup_map),
1199 &compressor,
1200 Some(&encryptor),
1201 )
1202 .unwrap();
1203
1204 let offset_after_block1 = offset;
1205
1206 // Write second encrypted block (same content, different nonce)
1207 let chunk2 = vec![0xAAu8; 4096];
1208 let info2 = write_block_simple(
1209 &mut output,
1210 &chunk2,
1211 1,
1212 &mut offset,
1213 Some(&mut dedup_map),
1214 &compressor,
1215 Some(&encryptor),
1216 )
1217 .unwrap();
1218
1219 // Both blocks should be written (no dedup with encryption)
1220 assert_eq!(info1.offset, 512);
1221 assert_eq!(info2.offset, offset_after_block1);
1222 assert!(offset > offset_after_block1);
1223
1224 // Dedup map should be empty (encryption disables dedup)
1225 assert_eq!(dedup_map.len(), 0);
1226 }
1227
1228 #[test]
1229 fn test_write_block_multiple_sequential() {
1230 let mut output = Cursor::new(Vec::new());
1231 let mut offset = 512u64;
1232 let compressor = Lz4Compressor::new();
1233
1234 let mut expected_offset = 512u64;
1235
1236 // Write 10 blocks sequentially
1237 for i in 0..10 {
1238 let chunk = vec![i as u8; 4096];
1239 let info = write_block_no_dedup(&mut output, &chunk, i, &mut offset, &compressor, None)
1240 .unwrap();
1241
1242 assert_eq!(info.offset, expected_offset);
1243 expected_offset += info.length as u64;
1244 }
1245
1246 assert_eq!(offset, expected_offset);
1247 }
1248
1249 #[test]
1250 fn test_write_block_preserves_logical_length() {
1251 let mut output = Cursor::new(Vec::new());
1252 let mut offset = 512u64;
1253 let compressor = Lz4Compressor::new();
1254
1255 for size in [128, 1024, 4096, 65536] {
1256 let chunk = vec![0xAAu8; size];
1257 let info = write_block_no_dedup(&mut output, &chunk, 0, &mut offset, &compressor, None)
1258 .unwrap();
1259
1260 assert_eq!(info.logical_len, size as u32);
1261 }
1262 }
1263
1264 #[test]
1265 fn test_write_block_checksum_differs() {
1266 let mut output1 = Cursor::new(Vec::new());
1267 let mut output2 = Cursor::new(Vec::new());
1268 let mut offset1 = 512u64;
1269 let mut offset2 = 512u64;
1270 let compressor = Lz4Compressor::new();
1271
1272 let chunk1 = vec![0xAAu8; 4096];
1273 let chunk2 = vec![0xBBu8; 4096];
1274
1275 let info1 = write_block_no_dedup(&mut output1, &chunk1, 0, &mut offset1, &compressor, None)
1276 .unwrap();
1277
1278 let info2 = write_block_no_dedup(&mut output2, &chunk2, 0, &mut offset2, &compressor, None)
1279 .unwrap();
1280
1281 // Different input data should produce different checksums
1282 assert_ne!(info1.checksum, info2.checksum);
1283 }
1284
1285 #[test]
1286 fn test_write_block_empty_chunk() {
1287 let mut output = Cursor::new(Vec::new());
1288 let mut offset = 512u64;
1289 let chunk: Vec<u8> = vec![];
1290 let compressor = Lz4Compressor::new();
1291
1292 let result = write_block_no_dedup(&mut output, &chunk, 0, &mut offset, &compressor, None);
1293
1294 // Should handle empty chunk
1295 assert!(result.is_ok());
1296 let info = result.unwrap();
1297 assert_eq!(info.logical_len, 0);
1298 }
1299
1300 #[test]
1301 fn test_write_block_large_block() {
1302 let mut output = Cursor::new(Vec::new());
1303 let mut offset = 512u64;
1304 let chunk = vec![0xAAu8; 1024 * 1024]; // 1 MB
1305 let compressor = Lz4Compressor::new();
1306
1307 let result = write_block_no_dedup(&mut output, &chunk, 0, &mut offset, &compressor, None);
1308
1309 assert!(result.is_ok());
1310 let info = result.unwrap();
1311 assert_eq!(info.logical_len, 1024 * 1024);
1312 // Highly compressible data should compress well
1313 assert!(info.length < info.logical_len);
1314 }
1315
1316 #[test]
1317 fn test_integration_zero_detection_and_write() {
1318 let mut output = Cursor::new(Vec::new());
1319 let mut offset = 512u64;
1320 let compressor = Lz4Compressor::new();
1321
1322 let zero_chunk = vec![0u8; 4096];
1323 let data_chunk = vec![0xAAu8; 4096];
1324
1325 // Process zero chunk
1326 let zero_info = if is_zero_chunk(&zero_chunk) {
1327 create_zero_block(zero_chunk.len() as u32)
1328 } else {
1329 write_block_no_dedup(&mut output, &zero_chunk, 0, &mut offset, &compressor, None)
1330 .unwrap()
1331 };
1332
1333 // Process data chunk
1334 let data_info = if is_zero_chunk(&data_chunk) {
1335 create_zero_block(data_chunk.len() as u32)
1336 } else {
1337 write_block_no_dedup(&mut output, &data_chunk, 1, &mut offset, &compressor, None)
1338 .unwrap()
1339 };
1340
1341 // Zero block should not be written
1342 assert_eq!(zero_info.offset, 0);
1343 assert_eq!(zero_info.length, 0);
1344
1345 // Data block should be written
1346 assert_eq!(data_info.offset, 512);
1347 assert!(data_info.length > 0);
1348 }
1349}