Skip to main content

self_encryption/
lib.rs

1// Copyright 2021 MaidSafe.net limited.
2//
3// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3.
4// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed
5// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
6// KIND, either express or implied. Please review the Licences for the specific language governing
7// permissions and limitations relating to use of the SAFE Network Software.
8
9//! A file **content** self_encryptor.
10//!
11//! This library provides convergent encryption on file-based data and produces a `DataMap` type and
12//! several chunks of encrypted data. Each chunk is up to 1MB in size and has an index and a name. This name is the
13//! BLAKE3 hash of the content, which allows the chunks to be self-validating.  If size and hash
14//! checks are utilised, a high degree of certainty in the validity of the data can be expected.
15//!
16//! [Project GitHub page](https://github.com/maidsafe/self_encryption).
17//!
18//! # Examples
19//!
20//! A working implementation can be found
21//! in the "examples" folder of this project.
22//!
23//! ```
24//! use self_encryption::{encrypt, test_helpers::random_bytes};
25//!
26//! #[tokio::main]
27//! async fn main() {
28//!     let file_size = 10_000_000;
29//!     let bytes = random_bytes(file_size);
30//!
31//!     if let Ok((_data_map, _encrypted_chunks)) = encrypt(bytes) {
32//!         // .. then persist the `encrypted_chunks`.
33//!         // Remember to keep `data_map` somewhere safe..!
34//!     }
35//! }
36//! ```
37//!
38//! Storage of the `Vec<EncryptedChunk>` or `DataMap` is outwith the scope of this
39//! library and must be implemented by the user.
40
41#![doc(
42    html_logo_url = "https://raw.githubusercontent.com/maidsafe/QA/master/Images/maidsafe_logo.png",
43    html_favicon_url = "https://maidsafe.net/img/favicon.ico",
44    test(attr(forbid(warnings)))
45)]
46// For explanation of lint checks, run `rustc -W help` or see
47// https://github.com/maidsafe/QA/blob/master/Documentation/Rust%20Lint%20Checks.md
48#![forbid(
49    arithmetic_overflow,
50    mutable_transmutes,
51    no_mangle_const_items,
52    unknown_crate_types
53)]
54#![deny(
55    bad_style,
56    deprecated,
57    improper_ctypes,
58    missing_docs,
59    non_shorthand_field_patterns,
60    overflowing_literals,
61    stable_features,
62    unconditional_recursion,
63    unknown_lints,
64    unsafe_code,
65    unused,
66    unused_allocation,
67    unused_attributes,
68    unused_comparisons,
69    unused_features,
70    unused_parens,
71    while_true
72)]
73#![cfg_attr(not(feature = "python"), deny(warnings))]
74#![warn(
75    trivial_casts,
76    trivial_numeric_casts,
77    unused_extern_crates,
78    unused_import_braces,
79    unused_results
80)]
81#![allow(
82    missing_copy_implementations,
83    missing_debug_implementations,
84    variant_size_differences,
85    non_camel_case_types
86)]
87// Doesn't allow casts on constants yet, remove when issue is fixed:
88// https://github.com/rust-lang-nursery/rust-clippy/issues/2267
89#![allow(clippy::cast_lossless, clippy::decimal_literal_representation)]
90
91mod chunk;
92mod cipher;
93mod data_map;
94mod decrypt;
95mod encrypt;
96mod error;
97/// BLAKE3 content hashing (replaces SHA3-256)
98pub mod hash;
99#[cfg(feature = "python")]
100mod python;
101mod stream_decrypt;
102mod stream_encrypt;
103pub mod test_helpers;
104mod utils;
105
106pub use chunk::EncryptedChunk;
107pub use decrypt::decrypt_chunk;
108use utils::*;
109pub use xor_name::XorName;
110
111pub use self::{
112    data_map::{ChunkInfo, DataMap},
113    error::{Error, Result},
114    stream_decrypt::{streaming_decrypt, DecryptionStream},
115    stream_encrypt::{stream_encrypt, ChunkStream, EncryptionStream},
116};
117use bytes::Bytes;
118use std::{collections::HashMap, sync::LazyLock};
119
120// export these because they are used in our public API.
121pub use bytes;
122pub use xor_name;
123
124/// Batch size for streaming decrypt chunk fetching.
125///
126/// Can be overridden by the `STREAM_DECRYPT_BATCH_SIZE` environment variable.
127pub static STREAM_DECRYPT_BATCH_SIZE: LazyLock<usize> = LazyLock::new(|| {
128    std::env::var("STREAM_DECRYPT_BATCH_SIZE")
129        .ok()
130        .and_then(|s| s.parse().ok())
131        .unwrap_or(10)
132});
133
134/// The minimum size (before compression) of data to be self-encrypted, defined as 3B.
135pub const MIN_ENCRYPTABLE_BYTES: usize = 3 * MIN_CHUNK_SIZE;
136
137/// The maximum size (before compression) of an individual chunk of a file, defaulting to ~4MiB.
138/// Set to 4190208 (4MiB - 4KiB) to leave headroom for occasional compression growth.
139pub const MAX_CHUNK_SIZE: usize = match std::option_env!("MAX_CHUNK_SIZE") {
140    Some(v) => match usize::from_str_radix(v, 10) {
141        Ok(v) => v,
142        Err(_err) => panic!("`MAX_CHUNK_SIZE` failed to parse as usize"),
143    },
144    None => 4_190_208,
145};
146
147/// The minimum size (before compression) of an individual chunk of a file, defined as 1B.
148pub const MIN_CHUNK_SIZE: usize = 1;
149/// Controls the compression-speed vs compression-density tradeoffs.  The higher the quality, the
150/// slower the compression.  Range is 0 to 11.
151pub const COMPRESSION_QUALITY: i32 = 6;
152
153/// Encrypts a set of bytes and returns the encrypted data together with
154/// the data map that is derived from the input data.
155pub fn encrypt(bytes: Bytes) -> Result<(DataMap, Vec<EncryptedChunk>)> {
156    encrypt_with_child_level(bytes, 0)
157}
158
159/// Internal encryption that accepts a child_level for KDF domain separation.
160fn encrypt_with_child_level(
161    bytes: Bytes,
162    child_level: usize,
163) -> Result<(DataMap, Vec<EncryptedChunk>)> {
164    let file_size = bytes.len();
165    if file_size < MIN_ENCRYPTABLE_BYTES {
166        return Err(Error::Generic(format!(
167            "Too small for self-encryption! Required size at least {MIN_ENCRYPTABLE_BYTES}"
168        )));
169    }
170
171    let num_chunks = get_num_chunks(file_size);
172    if num_chunks < 3 {
173        return Err(Error::Generic(
174            "File must be large enough to generate at least 3 chunks".to_string(),
175        ));
176    }
177
178    let mut chunk_infos = Vec::with_capacity(num_chunks);
179    let mut first_chunks = Vec::with_capacity(2);
180    let mut src_hashes = Vec::with_capacity(num_chunks);
181    let mut encrypted_chunks = Vec::with_capacity(num_chunks);
182
183    // Process all chunks
184    for chunk_index in 0..num_chunks {
185        let (start, end) = get_start_end_positions(file_size, chunk_index);
186        let chunk_data = bytes.slice(start..end);
187        let src_hash = hash::content_hash(&chunk_data);
188        src_hashes.push(src_hash);
189
190        // Store first two chunks for later processing
191        if chunk_index < 2 {
192            first_chunks.push((chunk_index, chunk_data, src_hash, end - start));
193            continue;
194        }
195
196        // For chunks 2 onwards, we can encrypt immediately since we have the previous two hashes
197        let pki = get_pad_key_and_nonce(chunk_index, &src_hashes, child_level)?;
198        let encrypted_content = encrypt::encrypt_chunk(chunk_data, pki)?;
199        let dst_hash = hash::content_hash(&encrypted_content);
200
201        encrypted_chunks.push(EncryptedChunk {
202            content: encrypted_content,
203        });
204
205        chunk_infos.push(ChunkInfo {
206            index: chunk_index,
207            dst_hash,
208            src_hash,
209            src_size: end - start,
210        });
211    }
212
213    // Now process the first two chunks using the complete set of source hashes
214    for (chunk_index, chunk_data, src_hash, src_size) in first_chunks {
215        let pki = get_pad_key_and_nonce(chunk_index, &src_hashes, child_level)?;
216        let encrypted_content = encrypt::encrypt_chunk(chunk_data, pki)?;
217        let dst_hash = hash::content_hash(&encrypted_content);
218
219        encrypted_chunks.insert(
220            chunk_index,
221            EncryptedChunk {
222                content: encrypted_content,
223            },
224        );
225
226        chunk_infos.insert(
227            chunk_index,
228            ChunkInfo {
229                index: chunk_index,
230                dst_hash,
231                src_hash,
232                src_size,
233            },
234        );
235    }
236
237    let data_map = DataMap::new(chunk_infos);
238
239    // Shrink the data map and store additional chunks if needed
240    let (shrunk_data_map, _) = shrink_data_map(data_map, |_hash, content| {
241        encrypted_chunks.push(EncryptedChunk { content });
242        Ok(())
243    })?;
244
245    Ok((shrunk_data_map, encrypted_chunks))
246}
247
248/// Decrypts a full set of chunks using the provided data map.
249///
250/// This function takes a data map and a slice of encrypted chunks and decrypts them to recover
251/// the original data. It handles both root data maps and child data maps.
252///
253/// # Arguments
254///
255/// * `data_map` - The data map containing chunk information
256/// * `chunks` - The encrypted chunks to decrypt
257///
258/// # Returns
259///
260/// * `Result<Bytes>` - The decrypted data or an error if chunks are missing/corrupted
261pub(crate) fn decrypt_full_set(data_map: &DataMap, chunks: &[EncryptedChunk]) -> Result<Bytes> {
262    let src_hashes = extract_hashes(data_map);
263    let child_level = data_map.child().unwrap_or(0);
264
265    // Create a mapping of chunk hashes to chunks for efficient lookup
266    let chunk_map: HashMap<XorName, &EncryptedChunk> = chunks
267        .iter()
268        .map(|chunk| (hash::content_hash(&chunk.content), chunk))
269        .collect();
270
271    // Get chunks in the order specified by the data map
272    let mut sorted_chunks = Vec::with_capacity(data_map.len());
273    for info in data_map.infos() {
274        let chunk = chunk_map.get(&info.dst_hash).ok_or_else(|| {
275            Error::Generic(format!(
276                "Chunk with hash {:?} not found in data map",
277                info.dst_hash
278            ))
279        })?;
280        sorted_chunks.push(*chunk);
281    }
282
283    decrypt::decrypt_sorted_set(src_hashes, &sorted_chunks, child_level)
284}
285
286/// Decrypts a range of data from the encrypted chunks.
287///
288/// # Arguments
289/// * `data_map` - The data map containing chunk information
290/// * `chunks` - The encrypted chunks to decrypt
291/// * `file_pos` - The position within the complete file to start reading from
292/// * `len` - Number of bytes to read
293///
294/// # Returns
295/// * `Result<Bytes>` - The decrypted range of data or an error if chunks are missing/corrupted
296#[allow(dead_code)]
297pub(crate) fn decrypt_range(
298    data_map: &DataMap,
299    chunks: &[EncryptedChunk],
300    file_pos: usize,
301    len: usize,
302) -> Result<Bytes> {
303    let src_hashes = extract_hashes(data_map);
304
305    // Create a mapping of chunk hashes to chunks for efficient lookup
306    let chunk_map: HashMap<XorName, &EncryptedChunk> = chunks
307        .iter()
308        .map(|chunk| (hash::content_hash(&chunk.content), chunk))
309        .collect();
310
311    // Get chunk size info
312    let file_size = data_map.original_file_size();
313
314    // Calculate which chunks we need based on the range
315    let start_chunk = get_chunk_index(file_size, file_pos);
316    let end_pos = std::cmp::min(file_pos + len, file_size);
317    let end_chunk = get_chunk_index(file_size, end_pos);
318
319    // Get chunks in the order specified by the data map
320    let mut sorted_chunks = Vec::new();
321    for info in data_map.infos() {
322        if info.index >= start_chunk && info.index <= end_chunk {
323            let chunk = chunk_map.get(&info.dst_hash).ok_or_else(|| {
324                Error::Generic(format!(
325                    "Chunk with hash {:?} not found in data map",
326                    info.dst_hash
327                ))
328            })?;
329            sorted_chunks.push(*chunk);
330        }
331    }
332
333    // Decrypt all required chunks
334    let mut all_bytes = Vec::new();
335    for (idx, chunk) in sorted_chunks.iter().enumerate() {
336        let chunk_idx = start_chunk + idx;
337        let decrypted = decrypt_chunk(
338            chunk_idx,
339            &chunk.content,
340            &src_hashes,
341            data_map.child().unwrap_or(0),
342        )?;
343        all_bytes.extend_from_slice(&decrypted);
344    }
345
346    let bytes = Bytes::from(all_bytes);
347
348    // Calculate the actual offset within our decrypted data
349    let chunk_start_pos = get_start_position(file_size, start_chunk);
350    let internal_offset = file_pos - chunk_start_pos;
351
352    if internal_offset >= bytes.len() {
353        return Ok(Bytes::new());
354    }
355
356    // Extract just the range we need from the decrypted data
357    let available_len = bytes.len() - internal_offset;
358    let range_len = std::cmp::min(len, available_len);
359    let range_bytes = bytes.slice(internal_offset..internal_offset + range_len);
360
361    Ok(range_bytes)
362}
363
364/// Shrinks a data map by recursively encrypting it until the number of chunks is small enough
365/// Returns the final data map and all chunks generated during shrinking
366pub fn shrink_data_map<F>(
367    mut data_map: DataMap,
368    mut store_chunk: F,
369) -> Result<(DataMap, Vec<EncryptedChunk>)>
370where
371    F: FnMut(XorName, Bytes) -> Result<()>,
372{
373    let mut all_chunks = Vec::new();
374
375    while data_map.len() > 3 {
376        let next_child_level = data_map.child().map_or(1, |c| c + 1);
377        let bytes = data_map
378            .to_bytes()
379            .map(Bytes::from)
380            .map_err(|e| Error::Generic(format!("Failed to serialize data map: {e}")))?;
381
382        let (mut new_data_map, encrypted_chunks) =
383            encrypt_with_child_level(bytes, next_child_level)?;
384
385        // Store and collect chunks
386        for chunk in &encrypted_chunks {
387            store_chunk(hash::content_hash(&chunk.content), chunk.content.clone())?;
388        }
389        all_chunks.extend(encrypted_chunks);
390
391        // Tag the DataMap with the child_level used during encryption
392        new_data_map = DataMap::with_child(new_data_map.infos().to_vec(), next_child_level);
393        data_map = new_data_map;
394    }
395    Ok((data_map, all_chunks))
396}
397
398/// Recursively gets the root data map by decrypting child data maps
399/// Takes a chunk retrieval function that handles fetching the encrypted chunks
400pub fn get_root_data_map<F>(data_map: DataMap, get_chunk: &mut F) -> Result<DataMap>
401where
402    F: FnMut(XorName) -> Result<Bytes>,
403{
404    // Create a cache of found chunks at the top level
405    let mut chunk_cache = HashMap::new();
406
407    fn inner_get_root_map<F>(
408        data_map: DataMap,
409        get_chunk: &mut F,
410        chunk_cache: &mut HashMap<XorName, Bytes>,
411        depth: usize,
412    ) -> Result<DataMap>
413    where
414        F: FnMut(XorName) -> Result<Bytes>,
415    {
416        if depth > 100 {
417            return Err(Error::Generic(
418                "Maximum data map recursion depth exceeded".to_string(),
419            ));
420        }
421
422        // If this is the root data map (no child level), return it
423        if !data_map.is_child() {
424            return Ok(data_map);
425        }
426
427        // Get all the chunks for this data map using the provided retrieval function
428        let mut encrypted_chunks = Vec::new();
429
430        for chunk_info in data_map.infos() {
431            let chunk_data = if let Some(cached) = chunk_cache.get(&chunk_info.dst_hash) {
432                cached.clone()
433            } else {
434                let data = get_chunk(chunk_info.dst_hash)?;
435                let _ = chunk_cache.insert(chunk_info.dst_hash, data.clone());
436                data
437            };
438            encrypted_chunks.push(EncryptedChunk {
439                content: chunk_data,
440            });
441        }
442
443        // Decrypt the chunks to get the parent data map bytes
444        let decrypted_bytes = decrypt_full_set(&data_map, &encrypted_chunks)?;
445
446        // Deserialize into a DataMap
447        let parent_data_map = DataMap::from_bytes(&decrypted_bytes)
448            .map_err(|e| Error::Generic(format!("Failed to deserialize data map: {e}")))?;
449
450        // Recursively get the root data map
451        inner_get_root_map(parent_data_map, get_chunk, chunk_cache, depth + 1)
452    }
453
454    // Start the recursive process with our cache
455    inner_get_root_map(data_map, get_chunk, &mut chunk_cache, 0)
456}
457
458/// Decrypts data using chunks retrieved from any storage backend via the provided retrieval function.
459pub fn decrypt(data_map: &DataMap, chunks: &[EncryptedChunk]) -> Result<Bytes> {
460    // Create a mapping of chunk hashes to chunks for efficient lookup
461    let chunk_map: HashMap<XorName, &EncryptedChunk> = chunks
462        .iter()
463        .map(|chunk| (hash::content_hash(&chunk.content), chunk))
464        .collect();
465
466    // Helper function to find chunks using our hash map
467    let mut get_chunk = |hash| {
468        chunk_map
469            .get(&hash)
470            .map(|chunk| chunk.content.clone())
471            .ok_or_else(|| Error::Generic(format!("Chunk not found for hash: {hash:?}")))
472    };
473
474    // Get the root map if we're dealing with a child map
475    let root_map = if data_map.is_child() {
476        get_root_data_map(data_map.clone(), &mut get_chunk)?
477    } else {
478        data_map.clone()
479    };
480
481    // Get only the chunks needed for the root map
482    let root_chunks: Vec<EncryptedChunk> = root_map
483        .infos()
484        .iter()
485        .map(|info| {
486            chunk_map
487                .get(&info.dst_hash)
488                .map(|chunk| EncryptedChunk {
489                    content: chunk.content.clone(),
490                })
491                .ok_or_else(|| {
492                    Error::Generic(format!("Missing chunk: {}", hex::encode(info.dst_hash)))
493                })
494        })
495        .collect::<Result<_>>()?;
496
497    decrypt_full_set(&root_map, &root_chunks)
498}
499
500/// Recursively gets the root data map by decrypting child data maps using parallel chunk retrieval.
501///
502/// This function works similarly to `get_root_data_map`, but it retrieves chunks in parallel,
503/// improving performance when dealing with large data maps or slow storage backends.
504///
505/// # Arguments
506///
507/// * `data_map` - The data map to retrieve the root from.
508/// * `get_chunk_parallel` - A function that retrieves chunks in parallel given a list of XorName hashes.
509///
510/// # Returns
511///
512/// * `Result<DataMap>` - The root data map or an error if retrieval or decryption fails.
513pub fn get_root_data_map_parallel<F>(data_map: DataMap, get_chunk_parallel: &F) -> Result<DataMap>
514where
515    F: Fn(&[(usize, XorName)]) -> Result<Vec<(usize, Bytes)>>,
516{
517    // Create a cache for chunks to avoid redundant retrievals
518    let mut chunk_cache = HashMap::new();
519
520    fn inner_get_root_map<F>(
521        data_map: DataMap,
522        get_chunk_parallel: &F,
523        chunk_cache: &mut HashMap<XorName, Bytes>,
524        depth: usize,
525    ) -> Result<DataMap>
526    where
527        F: Fn(&[(usize, XorName)]) -> Result<Vec<(usize, Bytes)>>,
528    {
529        if depth > 100 {
530            return Err(Error::Generic(
531                "Maximum data map recursion depth exceeded".to_string(),
532            ));
533        }
534
535        // If this is the root data map (no child level), return it
536        if !data_map.is_child() {
537            return Ok(data_map);
538        }
539
540        // Determine which chunks are missing from the cache
541        let missing_hashes: Vec<_> = data_map
542            .infos()
543            .iter()
544            .map(|info| (info.index, info.dst_hash))
545            .filter(|(_i, hash)| !chunk_cache.contains_key(hash))
546            .collect();
547
548        if !missing_hashes.is_empty() {
549            let new_chunks = get_chunk_parallel(&missing_hashes)?;
550            for ((_i, hash), (_j, chunk_data)) in missing_hashes.iter().zip(new_chunks.into_iter())
551            {
552                let _ = chunk_cache.insert(*hash, chunk_data);
553            }
554        }
555
556        let encrypted_chunks: Vec<EncryptedChunk> = data_map
557            .infos()
558            .iter()
559            .map(|info| {
560                let content = chunk_cache.get(&info.dst_hash).ok_or_else(|| {
561                    let dst_hash = info.dst_hash;
562                    Error::Generic(format!("Chunk not found for hash: {dst_hash:?}"))
563                })?;
564                Ok(EncryptedChunk {
565                    content: content.clone(),
566                })
567            })
568            .collect::<Result<_>>()?;
569
570        // Decrypt the chunks to get the parent data map bytes
571        let decrypted_bytes = decrypt_full_set(&data_map, &encrypted_chunks)?;
572        let parent_data_map = DataMap::from_bytes(&decrypted_bytes)
573            .map_err(|e| Error::Generic(format!("Failed to deserialize data map: {e}")))?;
574
575        // Recursively get the root data map
576        inner_get_root_map(parent_data_map, get_chunk_parallel, chunk_cache, depth + 1)
577    }
578
579    // Start the recursive process with our cache
580    inner_get_root_map(data_map, get_chunk_parallel, &mut chunk_cache, 0)
581}
582
583/// Serializes a data structure using bincode.
584///
585/// # Arguments
586///
587/// * `data` - The data structure to serialize, must implement `serde::Serialize`
588///
589/// # Returns
590///
591/// * `Result<Vec<u8>>` - The serialized bytes or an error
592pub fn serialize<T: serde::Serialize>(data: &T) -> Result<Vec<u8>> {
593    bincode::serialize(data).map_err(|e| Error::Generic(format!("Serialization error: {e}")))
594}
595
596/// Deserializes bytes into a data structure using bincode.
597///
598/// # Arguments
599///
600/// * `bytes` - The bytes to deserialize
601///
602/// # Returns
603///
604/// * `Result<T>` - The deserialized data structure or an error
605pub fn deserialize<T: serde::de::DeserializeOwned>(bytes: &[u8]) -> Result<T> {
606    bincode::deserialize(bytes).map_err(|e| Error::Generic(format!("Deserialization error: {e}")))
607}
608
609/// Verifies and deserializes a chunk by checking its content hash matches the provided name.
610///
611/// # Arguments
612///
613/// * `name` - The expected XorName hash of the chunk content
614/// * `bytes` - The serialized chunk content to verify
615///
616/// # Returns
617///
618/// * `Result<EncryptedChunk>` - The deserialized chunk if verification succeeds
619/// * `Error` - If the content hash doesn't match or deserialization fails
620pub fn verify_chunk(name: XorName, bytes: &[u8]) -> Result<EncryptedChunk> {
621    // Create an EncryptedChunk from the bytes
622    let chunk = EncryptedChunk {
623        content: Bytes::from(bytes.to_vec()),
624    };
625
626    // Calculate the hash of the encrypted content directly
627    let calculated_hash = hash::content_hash(chunk.content.as_ref());
628
629    // Verify the hash matches
630    if calculated_hash != name {
631        return Err(Error::Generic(format!(
632            "Chunk content hash mismatch. Expected: {name:?}, Got: {calculated_hash:?}"
633        )));
634    }
635
636    Ok(chunk)
637}
638
639#[cfg(test)]
640mod tests {
641    use super::*;
642    use crate::test_helpers::random_bytes;
643    use std::{
644        io::Write,
645        sync::{Arc, Mutex},
646    };
647    use tempfile::NamedTempFile;
648
649    // Helper function to create a data map with specified number of chunks
650    #[allow(dead_code)]
651    fn create_test_data_map(num_chunks: usize) -> Result<DataMap> {
652        let bytes = random_bytes(num_chunks * MIN_CHUNK_SIZE);
653        let (data_map, _) = encrypt(bytes)?;
654        Ok(data_map)
655    }
656
657    #[allow(dead_code)]
658    fn create_dummy_data_map(num_chunks: usize) -> DataMap {
659        let mut chunks = Vec::with_capacity(num_chunks);
660        for i in 0..num_chunks {
661            chunks.push(ChunkInfo {
662                index: i,
663                dst_hash: hash::content_hash(&[i as u8]),
664                src_hash: hash::content_hash(&[i as u8]),
665                src_size: MIN_CHUNK_SIZE,
666            });
667        }
668        DataMap::new(chunks)
669    }
670
671    #[test]
672    fn test_multiple_levels_of_shrinking() -> Result<()> {
673        // Create a temp file with random data
674        let bytes = random_bytes(10_000_000);
675        let mut temp_file = NamedTempFile::new()?;
676        temp_file.write_all(&bytes)?;
677
678        let storage = HashMap::new();
679        let storage_clone = Arc::new(Mutex::new(storage));
680
681        let store = move |hash: XorName, content: Bytes| -> Result<()> {
682            let _ = storage_clone.lock().unwrap().insert(hash, content.to_vec());
683            Ok(())
684        };
685
686        // Use standard encryption which supports shrinking
687        let (data_map, encrypted_chunks) = encrypt(bytes)?;
688
689        // Store the chunks
690        for chunk in &encrypted_chunks {
691            store(hash::content_hash(&chunk.content), chunk.content.clone())?;
692        }
693        assert!(data_map.chunk_identifiers.len() <= 3);
694
695        Ok(())
696    }
697
698    #[test]
699    fn test_streaming_encrypt_4mb_file() -> Result<()> {
700        // Create test data - exactly 4MB
701        let file_size = 4 * 1024 * 1024;
702        let bytes = random_bytes(file_size);
703
704        // Create storage for encrypted chunks
705        let storage = Arc::new(Mutex::new(HashMap::new()));
706        let storage_clone = storage.clone();
707
708        // Store function that also prints chunk info for debugging
709        let store = move |hash: XorName, content: Bytes| -> Result<()> {
710            println!(
711                "Storing chunk: {} (size: {}) at index {}",
712                hex::encode(hash),
713                content.len(),
714                storage_clone.lock().unwrap().len()
715            );
716            let _ = storage_clone.lock().unwrap().insert(hash, content.to_vec());
717            Ok(())
718        };
719
720        // First encrypt the data directly to get ALL chunks
721        let (data_map, initial_chunks) = encrypt(bytes.clone())?;
722
723        println!("Initial data map has {} chunks", data_map.len());
724        println!("Data map child level: {:?}", data_map.child());
725
726        // Start with all initial chunks
727        let mut all_chunks = Vec::new();
728        all_chunks.extend(initial_chunks);
729
730        // Store all chunks
731        for chunk in &all_chunks {
732            let hash = hash::content_hash(&chunk.content);
733            store(hash, chunk.content.clone())?;
734        }
735
736        // Now do a shrink operation
737        let mut store_memory = store.clone();
738        let (shrunk_map, shrink_chunks) = shrink_data_map(data_map.clone(), &mut store_memory)?;
739        println!("Got {} new chunks from shrinking", shrink_chunks.len());
740
741        // Add shrink chunks to our collection
742        all_chunks.extend(shrink_chunks);
743
744        println!("\nFinal Data Map Info:");
745        println!("Number of chunks: {}", shrunk_map.len());
746        println!("Original file size: {file_size}");
747        println!("Is child: {}", shrunk_map.is_child());
748
749        for (i, info) in shrunk_map.infos().iter().enumerate() {
750            println!(
751                "Chunk {}: index={}, src_size={}, src_hash={}, dst_hash={}",
752                i,
753                info.index,
754                info.src_size,
755                hex::encode(info.src_hash),
756                hex::encode(info.dst_hash)
757            );
758        }
759
760        // Print all stored chunks
761        println!("\nStored Chunks:");
762        let stored = storage.lock().unwrap();
763        for (hash, content) in stored.iter() {
764            println!("Hash: {} (size: {})", hex::encode(hash), content.len());
765        }
766
767        // Create chunk retrieval function for streaming_decrypt
768        let stored_clone = stored.clone();
769        let get_chunk_parallel = |hashes: &[(usize, XorName)]| -> Result<Vec<(usize, Bytes)>> {
770            hashes
771                .iter()
772                .map(|(i, hash)| {
773                    stored_clone
774                        .get(hash)
775                        .map(|data| (*i, Bytes::from(data.clone())))
776                        .ok_or_else(|| {
777                            Error::Generic(format!("Missing chunk: {}", hex::encode(hash)))
778                        })
779                })
780                .collect()
781        };
782
783        // Decrypt using streaming_decrypt
784        let decrypt_stream = streaming_decrypt(&shrunk_map, &get_chunk_parallel)?;
785        let decrypted = decrypt_stream.range_full()?;
786
787        assert_eq!(decrypted.len(), file_size);
788        assert_eq!(&decrypted[..], &bytes[..]);
789
790        Ok(())
791    }
792}