self_encryption/
lib.rs

1// Copyright 2021 MaidSafe.net limited.
2//
3// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3.
4// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed
5// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
6// KIND, either express or implied. Please review the Licences for the specific language governing
7// permissions and limitations relating to use of the SAFE Network Software.
8
9//! A file **content** self_encryptor.
10//!
11//! This library provides convergent encryption on file-based data and produces a `DataMap` type and
12//! several chunks of encrypted data. Each chunk is up to 1MB in size and has an index and a name. This name is the
13//! SHA3-256 hash of the content, which allows the chunks to be self-validating.  If size and hash
14//! checks are utilised, a high degree of certainty in the validity of the data can be expected.
15//!
16//! [Project GitHub page](https://github.com/maidsafe/self_encryption).
17//!
18//! # Examples
19//!
20//! A working implementation can be found
21//! in the "examples" folder of this project.
22//!
23//! ```
24//! use self_encryption::{encrypt, test_helpers::random_bytes};
25//!
26//! #[tokio::main]
27//! async fn main() {
28//!     let file_size = 10_000_000;
29//!     let bytes = random_bytes(file_size);
30//!
31//!     if let Ok((_data_map, _encrypted_chunks)) = encrypt(bytes) {
32//!         // .. then persist the `encrypted_chunks`.
33//!         // Remember to keep `data_map` somewhere safe..!
34//!     }
35//! }
36//! ```
37//!
38//! Storage of the `Vec<EncryptedChunk>` or `DataMap` is outwith the scope of this
39//! library and must be implemented by the user.
40
41#![doc(
42    html_logo_url = "https://raw.githubusercontent.com/maidsafe/QA/master/Images/maidsafe_logo.png",
43    html_favicon_url = "https://maidsafe.net/img/favicon.ico",
44    test(attr(forbid(warnings)))
45)]
46// For explanation of lint checks, run `rustc -W help` or see
47// https://github.com/maidsafe/QA/blob/master/Documentation/Rust%20Lint%20Checks.md
48#![forbid(
49    arithmetic_overflow,
50    mutable_transmutes,
51    no_mangle_const_items,
52    unknown_crate_types
53)]
54#![deny(
55    bad_style,
56    deprecated,
57    improper_ctypes,
58    missing_docs,
59    non_shorthand_field_patterns,
60    overflowing_literals,
61    stable_features,
62    unconditional_recursion,
63    unknown_lints,
64    unsafe_code,
65    unused,
66    unused_allocation,
67    unused_attributes,
68    unused_comparisons,
69    unused_features,
70    unused_parens,
71    while_true
72)]
73#![cfg_attr(not(feature = "python"), deny(warnings))]
74#![warn(
75    trivial_casts,
76    trivial_numeric_casts,
77    unused_extern_crates,
78    unused_import_braces,
79    unused_results
80)]
81#![allow(
82    missing_copy_implementations,
83    missing_debug_implementations,
84    variant_size_differences,
85    non_camel_case_types
86)]
87// Doesn't allow casts on constants yet, remove when issue is fixed:
88// https://github.com/rust-lang-nursery/rust-clippy/issues/2267
89#![allow(clippy::cast_lossless, clippy::decimal_literal_representation)]
90
91mod aes;
92mod chunk;
93mod data_map;
94mod decrypt;
95mod encrypt;
96mod error;
97#[cfg(feature = "python")]
98mod python;
99mod stream_decrypt;
100mod stream_encrypt;
101mod stream_file;
102pub mod test_helpers;
103mod utils;
104
105pub use chunk::EncryptedChunk;
106pub use decrypt::decrypt_chunk;
107use utils::*;
108pub use xor_name::XorName;
109
110pub use self::{
111    data_map::{ChunkInfo, DataMap},
112    error::{Error, Result},
113    stream_decrypt::{streaming_decrypt, DecryptionStream},
114    stream_encrypt::{stream_encrypt, ChunkStream, EncryptionStream},
115    stream_file::{streaming_decrypt_from_storage, streaming_encrypt_from_file},
116};
117use bytes::Bytes;
118use std::{
119    fs::File,
120    io::{Read, Write},
121    path::Path,
122};
123
124// export these because they are used in our public API.
125pub use bytes;
126pub use xor_name;
127
128/// The minimum size (before compression) of data to be self-encrypted, defined as 3B.
129pub const MIN_ENCRYPTABLE_BYTES: usize = 3 * MIN_CHUNK_SIZE;
130
131/// The maximum size (before compression) of an individual chunk of a file, defaulting as 1MiB.
132pub const MAX_CHUNK_SIZE: usize = match std::option_env!("MAX_CHUNK_SIZE") {
133    Some(v) => match usize::from_str_radix(v, 10) {
134        Ok(v) => v,
135        Err(_err) => panic!("`MAX_CHUNK_SIZE` failed to parse as usize"),
136    },
137    // Default to 4MiB
138    None => 4 * 1024 * 1024,
139};
140
141/// The minimum size (before compression) of an individual chunk of a file, defined as 1B.
142pub const MIN_CHUNK_SIZE: usize = 1;
143/// Controls the compression-speed vs compression-density tradeoffs.  The higher the quality, the
144/// slower the compression.  Range is 0 to 11.
145pub const COMPRESSION_QUALITY: i32 = 6;
146
147/// Read a file from the disk to encrypt, and output the chunks to a given output directory if presents.
148pub fn encrypt_from_file(file_path: &Path, output_dir: &Path) -> Result<(DataMap, Vec<XorName>)> {
149    let mut file = File::open(file_path)?;
150    let mut bytes = Vec::new();
151    let _ = file.read_to_end(&mut bytes)?;
152    let bytes = Bytes::from(bytes);
153
154    // First encrypt the data to get all chunks
155    let (data_map, encrypted_chunks) = encrypt(bytes)?;
156
157    // Track all chunk names
158    let mut chunk_names = Vec::new();
159
160    // Store all chunks to disk
161    for chunk in encrypted_chunks {
162        let chunk_name = XorName::from_content(&chunk.content);
163        chunk_names.push(chunk_name);
164
165        let file_path = output_dir.join(hex::encode(chunk_name));
166        let mut output_file = File::create(file_path)?;
167        output_file.write_all(&chunk.content)?;
168    }
169
170    Ok((data_map, chunk_names))
171}
172
173/// Encrypts a set of bytes and returns the encrypted data together with
174/// the data map that is derived from the input data.
175pub fn encrypt(bytes: Bytes) -> Result<(DataMap, Vec<EncryptedChunk>)> {
176    let file_size = bytes.len();
177    if file_size < MIN_ENCRYPTABLE_BYTES {
178        return Err(Error::Generic(format!(
179            "Too small for self-encryption! Required size at least {MIN_ENCRYPTABLE_BYTES}"
180        )));
181    }
182
183    let num_chunks = get_num_chunks(file_size);
184    if num_chunks < 3 {
185        return Err(Error::Generic(
186            "File must be large enough to generate at least 3 chunks".to_string(),
187        ));
188    }
189
190    let mut chunk_infos = Vec::with_capacity(num_chunks);
191    let mut first_chunks = Vec::with_capacity(2);
192    let mut src_hashes = Vec::with_capacity(num_chunks);
193    let mut encrypted_chunks = Vec::with_capacity(num_chunks);
194
195    // Process all chunks
196    for chunk_index in 0..num_chunks {
197        let (start, end) = get_start_end_positions(file_size, chunk_index);
198        let chunk_data = bytes.slice(start..end);
199        let src_hash = XorName::from_content(&chunk_data);
200        src_hashes.push(src_hash);
201
202        // Store first two chunks for later processing
203        if chunk_index < 2 {
204            first_chunks.push((chunk_index, chunk_data, src_hash, end - start));
205            continue;
206        }
207
208        // For chunks 2 onwards, we can encrypt immediately since we have the previous two hashes
209        let pki = get_pad_key_and_iv(chunk_index, &src_hashes);
210        let encrypted_content = encrypt::encrypt_chunk(chunk_data, pki)?;
211        let dst_hash = XorName::from_content(&encrypted_content);
212
213        encrypted_chunks.push(EncryptedChunk {
214            content: encrypted_content,
215        });
216
217        chunk_infos.push(ChunkInfo {
218            index: chunk_index,
219            dst_hash,
220            src_hash,
221            src_size: end - start,
222        });
223    }
224
225    // Now process the first two chunks using the complete set of source hashes
226    for (chunk_index, chunk_data, src_hash, src_size) in first_chunks {
227        let pki = get_pad_key_and_iv(chunk_index, &src_hashes);
228        let encrypted_content = encrypt::encrypt_chunk(chunk_data, pki)?;
229        let dst_hash = XorName::from_content(&encrypted_content);
230
231        encrypted_chunks.insert(
232            chunk_index,
233            EncryptedChunk {
234                content: encrypted_content,
235            },
236        );
237
238        chunk_infos.insert(
239            chunk_index,
240            ChunkInfo {
241                index: chunk_index,
242                dst_hash,
243                src_hash,
244                src_size,
245            },
246        );
247    }
248
249    let data_map = DataMap::new(chunk_infos);
250
251    // Shrink the data map and store additional chunks if needed
252    let (shrunk_data_map, _) = shrink_data_map(data_map, |_hash, content| {
253        encrypted_chunks.push(EncryptedChunk { content });
254        Ok(())
255    })?;
256
257    Ok((shrunk_data_map, encrypted_chunks))
258}
259
260/// Decrypts a full set of chunks using the provided data map.
261///
262/// This function takes a data map and a slice of encrypted chunks and decrypts them to recover
263/// the original data. It handles both root data maps and child data maps.
264///
265/// # Arguments
266///
267/// * `data_map` - The data map containing chunk information
268/// * `chunks` - The encrypted chunks to decrypt
269///
270/// # Returns
271///
272/// * `Result<Bytes>` - The decrypted data or an error if chunks are missing/corrupted
273pub(crate) fn decrypt_full_set(data_map: &DataMap, chunks: &[EncryptedChunk]) -> Result<Bytes> {
274    let src_hashes = extract_hashes(data_map);
275
276    // Create a mapping of chunk hashes to chunks for efficient lookup
277    let chunk_map: std::collections::HashMap<XorName, &EncryptedChunk> = chunks
278        .iter()
279        .map(|chunk| (XorName::from_content(&chunk.content), chunk))
280        .collect();
281
282    // Get chunks in the order specified by the data map
283    let mut sorted_chunks = Vec::with_capacity(data_map.len());
284    for info in data_map.infos() {
285        let chunk = chunk_map.get(&info.dst_hash).ok_or_else(|| {
286            Error::Generic(format!(
287                "Chunk with hash {:?} not found in data map",
288                info.dst_hash
289            ))
290        })?;
291        sorted_chunks.push(*chunk);
292    }
293
294    decrypt::decrypt_sorted_set(src_hashes, &sorted_chunks)
295}
296
297/// Decrypts a range of data from the encrypted chunks.
298///
299/// # Arguments
300/// * `data_map` - The data map containing chunk information
301/// * `chunks` - The encrypted chunks to decrypt
302/// * `file_pos` - The position within the complete file to start reading from
303/// * `len` - Number of bytes to read
304///
305/// # Returns
306/// * `Result<Bytes>` - The decrypted range of data or an error if chunks are missing/corrupted
307#[allow(dead_code)]
308pub(crate) fn decrypt_range(
309    data_map: &DataMap,
310    chunks: &[EncryptedChunk],
311    file_pos: usize,
312    len: usize,
313) -> Result<Bytes> {
314    let src_hashes = extract_hashes(data_map);
315
316    // Create a mapping of chunk hashes to chunks for efficient lookup
317    let chunk_map: std::collections::HashMap<XorName, &EncryptedChunk> = chunks
318        .iter()
319        .map(|chunk| (XorName::from_content(&chunk.content), chunk))
320        .collect();
321
322    // Get chunk size info
323    let file_size = data_map.original_file_size();
324
325    // Calculate which chunks we need based on the range
326    let start_chunk = get_chunk_index(file_size, file_pos);
327    let end_pos = std::cmp::min(file_pos + len, file_size);
328    let end_chunk = get_chunk_index(file_size, end_pos);
329
330    // Get chunks in the order specified by the data map
331    let mut sorted_chunks = Vec::new();
332    for info in data_map.infos() {
333        if info.index >= start_chunk && info.index <= end_chunk {
334            let chunk = chunk_map.get(&info.dst_hash).ok_or_else(|| {
335                Error::Generic(format!(
336                    "Chunk with hash {:?} not found in data map",
337                    info.dst_hash
338                ))
339            })?;
340            sorted_chunks.push(*chunk);
341        }
342    }
343
344    // Decrypt all required chunks
345    let mut all_bytes = Vec::new();
346    for (idx, chunk) in sorted_chunks.iter().enumerate() {
347        let chunk_idx = start_chunk + idx;
348        let decrypted = decrypt_chunk(chunk_idx, &chunk.content, &src_hashes)?;
349        all_bytes.extend_from_slice(&decrypted);
350    }
351
352    let bytes = Bytes::from(all_bytes);
353
354    // Calculate the actual offset within our decrypted data
355    let chunk_start_pos = get_start_position(file_size, start_chunk);
356    let internal_offset = file_pos - chunk_start_pos;
357
358    if internal_offset >= bytes.len() {
359        return Ok(Bytes::new());
360    }
361
362    // Extract just the range we need from the decrypted data
363    let available_len = bytes.len() - internal_offset;
364    let range_len = std::cmp::min(len, available_len);
365    let range_bytes = bytes.slice(internal_offset..internal_offset + range_len);
366
367    Ok(range_bytes)
368}
369
370/// Shrinks a data map by recursively encrypting it until the number of chunks is small enough
371/// Returns the final data map and all chunks generated during shrinking
372pub fn shrink_data_map<F>(
373    mut data_map: DataMap,
374    mut store_chunk: F,
375) -> Result<(DataMap, Vec<EncryptedChunk>)>
376where
377    F: FnMut(XorName, Bytes) -> Result<()>,
378{
379    let mut all_chunks = Vec::new();
380
381    while data_map.len() > 3 {
382        let child_level = data_map.child().unwrap_or(0);
383        let bytes = test_helpers::serialise(&data_map)
384            .map(Bytes::from)
385            .map_err(|_| Error::Generic("Failed to serialize data map".to_string()))?;
386
387        let (mut new_data_map, encrypted_chunks) = encrypt(bytes)?;
388
389        // Store and collect chunks
390        for chunk in &encrypted_chunks {
391            store_chunk(XorName::from_content(&chunk.content), chunk.content.clone())?;
392        }
393        all_chunks.extend(encrypted_chunks);
394
395        // Update data map for next iteration
396        new_data_map = DataMap::with_child(new_data_map.infos(), child_level + 1);
397        data_map = new_data_map;
398    }
399    Ok((data_map, all_chunks))
400}
401
402/// Recursively gets the root data map by decrypting child data maps
403/// Takes a chunk retrieval function that handles fetching the encrypted chunks
404pub fn get_root_data_map<F>(data_map: DataMap, get_chunk: &mut F) -> Result<DataMap>
405where
406    F: FnMut(XorName) -> Result<Bytes>,
407{
408    // Create a cache of found chunks at the top level
409    let mut chunk_cache = std::collections::HashMap::new();
410
411    fn inner_get_root_map<F>(
412        data_map: DataMap,
413        get_chunk: &mut F,
414        chunk_cache: &mut std::collections::HashMap<XorName, Bytes>,
415    ) -> Result<DataMap>
416    where
417        F: FnMut(XorName) -> Result<Bytes>,
418    {
419        // If this is the root data map (no child level), return it
420        if !data_map.is_child() {
421            return Ok(data_map);
422        }
423
424        // Get all the chunks for this data map using the provided retrieval function
425        let mut encrypted_chunks = Vec::new();
426
427        for chunk_info in data_map.infos() {
428            let chunk_data = if let Some(cached) = chunk_cache.get(&chunk_info.dst_hash) {
429                cached.clone()
430            } else {
431                let data = get_chunk(chunk_info.dst_hash)?;
432                let _ = chunk_cache.insert(chunk_info.dst_hash, data.clone());
433                data
434            };
435            encrypted_chunks.push(EncryptedChunk {
436                content: chunk_data,
437            });
438        }
439
440        // Decrypt the chunks to get the parent data map bytes
441        let decrypted_bytes = decrypt_full_set(&data_map, &encrypted_chunks)?;
442
443        // Deserialize into a DataMap
444        let parent_data_map = test_helpers::deserialise(&decrypted_bytes)
445            .map_err(|_| Error::Generic("Failed to deserialize data map".to_string()))?;
446
447        // Recursively get the root data map
448        inner_get_root_map(parent_data_map, get_chunk, chunk_cache)
449    }
450
451    // Start the recursive process with our cache
452    inner_get_root_map(data_map, get_chunk, &mut chunk_cache)
453}
454
455/// Decrypts data using chunks retrieved from any storage backend via the provided retrieval function.
456/// Writes the decrypted output to the specified file path.
457pub fn decrypt_from_storage<F>(
458    data_map: &DataMap,
459    output_filepath: &Path,
460    mut get_chunk: F,
461) -> Result<()>
462where
463    F: FnMut(XorName) -> Result<Bytes>,
464{
465    let root_map = if data_map.is_child() {
466        get_root_data_map(data_map.clone(), &mut get_chunk)?
467    } else {
468        data_map.clone()
469    };
470    let mut encrypted_chunks = Vec::new();
471    for chunk_info in root_map.infos() {
472        let chunk_data = get_chunk(chunk_info.dst_hash)?;
473        encrypted_chunks.push(EncryptedChunk {
474            content: chunk_data,
475        });
476    }
477
478    let decrypted_content = decrypt_full_set(&root_map, &encrypted_chunks)?;
479    File::create(output_filepath)
480        .map_err(Error::from)?
481        .write_all(&decrypted_content)
482        .map_err(Error::from)?;
483
484    Ok(())
485}
486
487/// Decrypts data using chunks retrieved from any storage backend via the provided retrieval function.
488pub fn decrypt(data_map: &DataMap, chunks: &[EncryptedChunk]) -> Result<Bytes> {
489    // Create a mapping of chunk hashes to chunks for efficient lookup
490    let chunk_map: std::collections::HashMap<XorName, &EncryptedChunk> = chunks
491        .iter()
492        .map(|chunk| (XorName::from_content(&chunk.content), chunk))
493        .collect();
494
495    // Helper function to find chunks using our hash map
496    let mut get_chunk = |hash| {
497        chunk_map
498            .get(&hash)
499            .map(|chunk| chunk.content.clone())
500            .ok_or_else(|| Error::Generic(format!("Chunk not found for hash: {hash:?}")))
501    };
502
503    // Get the root map if we're dealing with a child map
504    let root_map = if data_map.is_child() {
505        get_root_data_map(data_map.clone(), &mut get_chunk)?
506    } else {
507        data_map.clone()
508    };
509
510    // Get only the chunks needed for the root map
511    let root_chunks: Vec<EncryptedChunk> = root_map
512        .infos()
513        .iter()
514        .map(|info| {
515            chunk_map
516                .get(&info.dst_hash)
517                .map(|chunk| EncryptedChunk {
518                    content: chunk.content.clone(),
519                })
520                .ok_or_else(|| {
521                    Error::Generic(format!("Missing chunk: {}", hex::encode(info.dst_hash)))
522                })
523        })
524        .collect::<Result<_>>()?;
525
526    decrypt_full_set(&root_map, &root_chunks)
527}
528
529/// Recursively gets the root data map by decrypting child data maps using parallel chunk retrieval.
530///
531/// This function works similarly to `get_root_data_map`, but it retrieves chunks in parallel,
532/// improving performance when dealing with large data maps or slow storage backends.
533///
534/// # Arguments
535///
536/// * `data_map` - The data map to retrieve the root from.
537/// * `get_chunk_parallel` - A function that retrieves chunks in parallel given a list of XorName hashes.
538///
539/// # Returns
540///
541/// * `Result<DataMap>` - The root data map or an error if retrieval or decryption fails.
542pub fn get_root_data_map_parallel<F>(data_map: DataMap, get_chunk_parallel: &F) -> Result<DataMap>
543where
544    F: Fn(&[(usize, XorName)]) -> Result<Vec<(usize, Bytes)>>,
545{
546    // Create a cache for chunks to avoid redundant retrievals
547    let mut chunk_cache = std::collections::HashMap::new();
548
549    fn inner_get_root_map<F>(
550        data_map: DataMap,
551        get_chunk_parallel: &F,
552        chunk_cache: &mut std::collections::HashMap<XorName, Bytes>,
553    ) -> Result<DataMap>
554    where
555        F: Fn(&[(usize, XorName)]) -> Result<Vec<(usize, Bytes)>>,
556    {
557        // If this is the root data map (no child level), return it
558        if !data_map.is_child() {
559            return Ok(data_map);
560        }
561
562        // Determine which chunks are missing from the cache
563        let missing_hashes: Vec<_> = data_map
564            .infos()
565            .iter()
566            .map(|info| (info.index, info.dst_hash))
567            .filter(|(_i, hash)| !chunk_cache.contains_key(hash))
568            .collect();
569
570        if !missing_hashes.is_empty() {
571            let new_chunks = get_chunk_parallel(&missing_hashes)?;
572            for ((_i, hash), (_j, chunk_data)) in missing_hashes.iter().zip(new_chunks.into_iter())
573            {
574                let _ = chunk_cache.insert(*hash, chunk_data);
575            }
576        }
577
578        let encrypted_chunks: Vec<EncryptedChunk> = data_map
579            .infos()
580            .iter()
581            .map(|info| {
582                let content = chunk_cache.get(&info.dst_hash).ok_or_else(|| {
583                    Error::Generic(format!("Chunk not found for hash: {:?}", info.dst_hash))
584                })?;
585                Ok(EncryptedChunk {
586                    content: content.clone(),
587                })
588            })
589            .collect::<Result<_>>()?;
590
591        // Decrypt the chunks to get the parent data map bytes
592        let decrypted_bytes = decrypt_full_set(&data_map, &encrypted_chunks)?;
593        let parent_data_map = test_helpers::deserialise(&decrypted_bytes)
594            .map_err(|_| Error::Generic("Failed to deserialize data map".to_string()))?;
595
596        // Recursively get the root data map
597        inner_get_root_map(parent_data_map, get_chunk_parallel, chunk_cache)
598    }
599
600    // Start the recursive process with our cache
601    inner_get_root_map(data_map, get_chunk_parallel, &mut chunk_cache)
602}
603
604/// Serializes a data structure using bincode.
605///
606/// # Arguments
607///
608/// * `data` - The data structure to serialize, must implement `serde::Serialize`
609///
610/// # Returns
611///
612/// * `Result<Vec<u8>>` - The serialized bytes or an error
613pub fn serialize<T: serde::Serialize>(data: &T) -> Result<Vec<u8>> {
614    bincode::serialize(data).map_err(|e| Error::Generic(format!("Serialization error: {e}")))
615}
616
617/// Deserializes bytes into a data structure using bincode.
618///
619/// # Arguments
620///
621/// * `bytes` - The bytes to deserialize
622///
623/// # Returns
624///
625/// * `Result<T>` - The deserialized data structure or an error
626pub fn deserialize<T: serde::de::DeserializeOwned>(bytes: &[u8]) -> Result<T> {
627    bincode::deserialize(bytes).map_err(|e| Error::Generic(format!("Deserialization error: {e}")))
628}
629
630/// Verifies and deserializes a chunk by checking its content hash matches the provided name.
631///
632/// # Arguments
633///
634/// * `name` - The expected XorName hash of the chunk content
635/// * `bytes` - The serialized chunk content to verify
636///
637/// # Returns
638///
639/// * `Result<EncryptedChunk>` - The deserialized chunk if verification succeeds
640/// * `Error` - If the content hash doesn't match or deserialization fails
641pub fn verify_chunk(name: XorName, bytes: &[u8]) -> Result<EncryptedChunk> {
642    // Create an EncryptedChunk from the bytes
643    let chunk = EncryptedChunk {
644        content: Bytes::from(bytes.to_vec()),
645    };
646
647    // Calculate the hash of the encrypted content directly
648    let calculated_hash = XorName::from_content(chunk.content.as_ref());
649
650    // Verify the hash matches
651    if calculated_hash != name {
652        return Err(Error::Generic(format!(
653            "Chunk content hash mismatch. Expected: {name:?}, Got: {calculated_hash:?}"
654        )));
655    }
656
657    Ok(chunk)
658}
659
660#[cfg(test)]
661mod tests {
662    use super::*;
663    use crate::test_helpers::random_bytes;
664    use std::{
665        collections::HashMap,
666        io::Write,
667        sync::{Arc, Mutex},
668    };
669    use tempfile::NamedTempFile;
670
671    // Helper function to create a data map with specified number of chunks
672    #[allow(dead_code)]
673    fn create_test_data_map(num_chunks: usize) -> Result<DataMap> {
674        let bytes = random_bytes(num_chunks * MIN_CHUNK_SIZE);
675        let (data_map, _) = encrypt(bytes)?;
676        Ok(data_map)
677    }
678
679    #[allow(dead_code)]
680    fn create_dummy_data_map(num_chunks: usize) -> DataMap {
681        let mut chunks = Vec::with_capacity(num_chunks);
682        for i in 0..num_chunks {
683            chunks.push(ChunkInfo {
684                index: i,
685                dst_hash: XorName::from_content(&[i as u8]),
686                src_hash: XorName::from_content(&[i as u8]),
687                src_size: MIN_CHUNK_SIZE,
688            });
689        }
690        DataMap::new(chunks)
691    }
692
693    #[test]
694    fn test_multiple_levels_of_shrinking() -> Result<()> {
695        // Create a temp file with random data
696        let bytes = random_bytes(10_000_000);
697        let mut temp_file = NamedTempFile::new()?;
698        temp_file.write_all(&bytes)?;
699
700        let storage = HashMap::new();
701        let storage_clone = Arc::new(Mutex::new(storage));
702
703        let store = move |hash: XorName, content: Bytes| -> Result<()> {
704            let _ = storage_clone.lock().unwrap().insert(hash, content.to_vec());
705            Ok(())
706        };
707
708        // Use standard encryption which supports shrinking
709        let (data_map, encrypted_chunks) = encrypt(bytes)?;
710
711        // Store the chunks
712        for chunk in &encrypted_chunks {
713            store(XorName::from_content(&chunk.content), chunk.content.clone())?;
714        }
715        assert!(data_map.chunk_identifiers.len() <= 3);
716
717        Ok(())
718    }
719
720    #[test]
721    fn test_streaming_encrypt_4mb_file() -> Result<()> {
722        // Create test data - exactly 4MB
723        let file_size = 4 * 1024 * 1024;
724        let bytes = random_bytes(file_size);
725
726        // Create storage for encrypted chunks
727        let storage = Arc::new(Mutex::new(HashMap::new()));
728        let storage_clone = storage.clone();
729
730        // Store function that also prints chunk info for debugging
731        let store = move |hash: XorName, content: Bytes| -> Result<()> {
732            println!(
733                "Storing chunk: {} (size: {}) at index {}",
734                hex::encode(hash),
735                content.len(),
736                storage_clone.lock().unwrap().len()
737            );
738            let _ = storage_clone.lock().unwrap().insert(hash, content.to_vec());
739            Ok(())
740        };
741
742        // First encrypt the data directly to get ALL chunks
743        let (data_map, initial_chunks) = encrypt(bytes.clone())?;
744
745        println!("Initial data map has {} chunks", data_map.len());
746        println!("Data map child level: {:?}", data_map.child());
747
748        // Start with all initial chunks
749        let mut all_chunks = Vec::new();
750        all_chunks.extend(initial_chunks);
751
752        // Store all chunks
753        for chunk in &all_chunks {
754            let hash = XorName::from_content(&chunk.content);
755            store(hash, chunk.content.clone())?;
756        }
757
758        // Now do a shrink operation
759        let mut store_memory = store.clone();
760        let (shrunk_map, shrink_chunks) = shrink_data_map(data_map.clone(), &mut store_memory)?;
761        println!("Got {} new chunks from shrinking", shrink_chunks.len());
762
763        // Add shrink chunks to our collection
764        all_chunks.extend(shrink_chunks);
765
766        println!("\nFinal Data Map Info:");
767        println!("Number of chunks: {}", shrunk_map.len());
768        println!("Original file size: {file_size}");
769        println!("Is child: {}", shrunk_map.is_child());
770
771        for (i, info) in shrunk_map.infos().iter().enumerate() {
772            println!(
773                "Chunk {}: index={}, src_size={}, src_hash={}, dst_hash={}",
774                i,
775                info.index,
776                info.src_size,
777                hex::encode(info.src_hash),
778                hex::encode(info.dst_hash)
779            );
780        }
781
782        // Print all stored chunks
783        println!("\nStored Chunks:");
784        let stored = storage.lock().unwrap();
785        for (hash, content) in stored.iter() {
786            println!("Hash: {} (size: {})", hex::encode(hash), content.len());
787        }
788
789        // Create output file for decryption
790        let output_file = tempfile::NamedTempFile::new()?;
791
792        // Create chunk retrieval function
793        let stored_clone = stored.clone();
794        let get_chunk = |hash: XorName| -> Result<Bytes> {
795            stored_clone
796                .get(&hash)
797                .map(|data| Bytes::from(data.clone()))
798                .ok_or_else(|| Error::Generic(format!("Missing chunk: {}", hex::encode(hash))))
799        };
800
801        // Decrypt using decrypt_from_storage
802        decrypt_from_storage(&shrunk_map, output_file.path(), get_chunk)?;
803
804        // Read and verify the decrypted data
805        let mut decrypted = Vec::new();
806        let _ = output_file.as_file().read_to_end(&mut decrypted)?;
807
808        assert_eq!(decrypted.len(), file_size);
809        assert_eq!(&decrypted[..], &bytes[..]);
810
811        Ok(())
812    }
813}