ng_repo/
file.rs

1// Copyright (c) 2022-2025 Niko Bonnieure, Par le Peuple, NextGraph.org developers
2// All rights reserved.
3// Licensed under the Apache License, Version 2.0
4// <LICENSE-APACHE2 or http://www.apache.org/licenses/LICENSE-2.0>
5// or the MIT license <LICENSE-MIT or http://opensource.org/licenses/MIT>,
6// at your option. All files in the project carrying such
7// notice may not be copied, modified, or distributed except
8// according to those terms.
9
10//! SmallFile and RandomAccessFile objects
11
12use core::fmt;
13use std::cmp::min;
14use std::collections::HashMap;
15use std::sync::Arc;
16
17use chacha20::cipher::{KeyIvInit, StreamCipher};
18use chacha20::ChaCha20;
19use zeroize::Zeroize;
20
21use crate::block_storage::*;
22use crate::errors::*;
23#[allow(unused_imports)]
24use crate::log::*;
25use crate::object::*;
26use crate::store::Store;
27use crate::types::*;
28
29/// File errors
30#[derive(Debug, Eq, PartialEq, Clone)]
31pub enum FileError {
32    /// Missing blocks
33    MissingBlocks(Vec<BlockId>),
34    /// Missing root key
35    MissingRootKey,
36    /// Invalid BlockId encountered in the tree
37    InvalidBlockId,
38    /// Too many or too few children of a block
39    InvalidChildren,
40    /// Number of keys does not match number of children of a block
41    InvalidKeys,
42    /// Invalid CommitHeader object content
43    InvalidHeader,
44    /// Error deserializing content of a block
45    BlockDeserializeError,
46    /// Error deserializing content of the RandomAccessFileMeta
47    MetaDeserializeError,
48    /// Files are immutable, you cannot modify them and this one was already saved once. Create a new File for your new data (and delete the old one if needed)
49    AlreadySaved,
50    /// File is too big
51    TooBig,
52    NotFound,
53    StorageError,
54    EndOfFile,
55    InvalidArgument,
56    NotAFile,
57}
58
59impl From<StorageError> for FileError {
60    fn from(e: StorageError) -> Self {
61        match e {
62            StorageError::NotFound => FileError::NotFound,
63            _ => FileError::StorageError,
64        }
65    }
66}
67
68impl From<ObjectParseError> for FileError {
69    fn from(e: ObjectParseError) -> Self {
70        match e {
71            _ => FileError::BlockDeserializeError,
72        }
73    }
74}
75
76pub trait ReadFile {
77    fn read(&self, pos: usize, size: usize) -> Result<Vec<u8>, FileError>;
78
79    fn get_all_blocks_ids(&self) -> Result<Vec<ObjectId>, FileError>;
80}
81
82/// A File in memory (read access only)
83pub struct File<'a> {
84    internal: Box<dyn ReadFile + 'a>,
85    blocks_ids: Vec<BlockId>,
86}
87
88impl<'a> File<'a> {
89    pub fn open(id: ObjectId, key: SymKey, store: Arc<Store>) -> Result<File<'a>, FileError> {
90        let root_block = store.get(&id)?;
91
92        if root_block.children().len() == 2
93            && *root_block.content().commit_header_obj() == CommitHeaderObject::RandomAccess
94        {
95            Ok(File {
96                internal: Box::new(RandomAccessFile::open(id, key, store)?),
97                blocks_ids: vec![],
98            })
99        } else {
100            let obj = Object::load(id, Some(key), &store)?;
101            match obj.content_v0()? {
102                ObjectContentV0::SmallFile(small_file) => Ok(File {
103                    internal: Box::new(small_file),
104                    blocks_ids: obj.block_ids(),
105                }),
106                _ => Err(FileError::NotAFile),
107            }
108        }
109    }
110}
111
112impl<'a> ReadFile for File<'a> {
113    fn read(&self, pos: usize, size: usize) -> Result<Vec<u8>, FileError> {
114        self.internal.read(pos, size)
115    }
116    fn get_all_blocks_ids(&self) -> Result<Vec<ObjectId>, FileError> {
117        if self.blocks_ids.len() > 0 {
118            Ok(self.blocks_ids.to_vec())
119        } else {
120            self.internal.get_all_blocks_ids()
121        }
122    }
123}
124
125impl ReadFile for SmallFile {
126    fn read(&self, pos: usize, size: usize) -> Result<Vec<u8>, FileError> {
127        match self {
128            Self::V0(v0) => v0.read(pos, size),
129        }
130    }
131    fn get_all_blocks_ids(&self) -> Result<Vec<ObjectId>, FileError> {
132        unimplemented!();
133    }
134}
135
136impl ReadFile for SmallFileV0 {
137    fn read(&self, pos: usize, size: usize) -> Result<Vec<u8>, FileError> {
138        if size == 0 {
139            return Err(FileError::InvalidArgument);
140        }
141        if pos + size > self.content.len() {
142            return Err(FileError::EndOfFile);
143        }
144        Ok(self.content[pos..pos + size].to_vec())
145    }
146    fn get_all_blocks_ids(&self) -> Result<Vec<ObjectId>, FileError> {
147        unimplemented!();
148    }
149}
150
151/// A RandomAccessFile in memory. This is not used to serialize data
152pub struct RandomAccessFile {
153    //storage: Arc<&'a dyn BlockStorage>,
154    store: Arc<Store>,
155    /// accurate once saved or opened
156    meta: RandomAccessFileMeta,
157
158    //meta_object_id: Option<BlockId>,
159    //content_block_id: Option<BlockId>,
160    /// keeps the deduplicated blocks' IDs, used for async writes
161    block_contents: HashMap<BlockKey, BlockId>,
162
163    /// Blocks of the Object (nodes of the tree). Only used when writing asynchronously, before saving.
164    blocks: Vec<(BlockId, BlockKey)>,
165
166    /// When an id is present, the File is opened in Read mode, and cannot be saved.
167    id: Option<ObjectId>,
168    key: Option<ObjectKey>,
169
170    content_block: Option<(BlockId, BlockKey)>,
171
172    // used for writes
173    conv_key: Option<[u8; 32]>,
174    remainder: Vec<u8>,
175    size: usize,
176}
177
178impl ReadFile for RandomAccessFile {
179    fn get_all_blocks_ids(&self) -> Result<Vec<ObjectId>, FileError> {
180        if self.id.is_none() {
181            unimplemented!();
182        }
183        let mut res = Vec::with_capacity(4);
184        let _: Vec<()> = self
185            .blocks
186            .iter()
187            .map(|(id, _)| res.push(id.clone()))
188            .collect();
189
190        recurse_tree(
191            &self.store,
192            self.content_block.as_ref().unwrap().clone(),
193            &mut res,
194            self.meta.depth(),
195        )?;
196
197        fn recurse_tree(
198            store: &Store,
199            current_block_id_key: (Digest, SymKey),
200            res: &mut Vec<Digest>,
201            level: u8,
202        ) -> Result<(), FileError> {
203            res.push(current_block_id_key.0);
204            if level > 0 {
205                let tree_block = store.get(&current_block_id_key.0)?;
206                let (children, content) = tree_block.read(&current_block_id_key.1)?;
207                if children.is_empty() || content.len() > 0 {
208                    return Err(FileError::BlockDeserializeError);
209                }
210
211                for child in children {
212                    recurse_tree(store, child, res, level - 1)?;
213                }
214            }
215            Ok(())
216        }
217        Ok(res)
218    }
219
220    /// reads at most one block from the file. the returned vector should be tested for size. it might be smaller than what you asked for.
221    /// `pos`ition can be anywhere in the file.
222    //TODO: parallelize decryption on multi threads (cores)
223    fn read(&self, pos: usize, mut size: usize) -> Result<Vec<u8>, FileError> {
224        if size == 0 {
225            return Err(FileError::InvalidArgument);
226        }
227        if self.id.is_some() {
228            let total = self.meta.total_size() as usize;
229            if pos > total {
230                return Err(FileError::EndOfFile);
231            }
232            size = min(total - pos, size);
233            let mut current_block_id_key = self.content_block.as_ref().unwrap().clone();
234
235            let depth = self.meta.depth();
236            let arity = self.meta.arity();
237
238            let mut level_pos = pos;
239            for level in 0..depth {
240                let tree_block = self.store.get(&current_block_id_key.0)?;
241                let (children, content) = tree_block.read(&current_block_id_key.1)?;
242                if children.is_empty() || content.len() > 0 {
243                    return Err(FileError::BlockDeserializeError);
244                }
245                let factor = (arity as usize).pow(depth as u32 - level as u32 - 1)
246                    * self.meta.chunk_size() as usize;
247                let level_index = pos / factor;
248                if level_index >= children.len() {
249                    return Err(FileError::EndOfFile);
250                }
251                current_block_id_key = (children[level_index]).clone();
252                level_pos = pos as usize % factor;
253            }
254
255            let content_block = self.store.get(&current_block_id_key.0)?;
256            //log_debug!("CONTENT BLOCK SIZE {}", content_block.size());
257
258            let (children, content) = content_block.read(&current_block_id_key.1)?;
259
260            if children.is_empty() && content.len() > 0 {
261                //log_debug!("CONTENT SIZE {}", content.len());
262
263                if level_pos >= content.len() {
264                    return Err(FileError::EndOfFile);
265                }
266                let end = min(content.len(), level_pos + size);
267                return Ok(content[level_pos..end].to_vec());
268            } else {
269                return Err(FileError::BlockDeserializeError);
270            }
271        } else {
272            // hasn't been saved yet, we can use the self.blocks as a flat array and the remainder too
273            let factor = self.meta.chunk_size() as usize;
274            let index = pos / factor as usize;
275            let level_pos = pos % factor as usize;
276            let remainder_pos = self.blocks.len() * factor;
277            if pos >= remainder_pos {
278                let pos_in_remainder = pos - remainder_pos;
279                if self.remainder.len() > 0 && pos_in_remainder < self.remainder.len() {
280                    let end = min(self.remainder.len(), pos_in_remainder + size);
281                    return Ok(self.remainder[pos_in_remainder..end].to_vec());
282                } else {
283                    return Err(FileError::EndOfFile);
284                }
285            }
286            //log_debug!("{} {} {} {}", index, self.blocks.len(), factor, level_pos);
287            if index >= self.blocks.len() {
288                return Err(FileError::EndOfFile);
289            }
290            let block = &self.blocks[index];
291            let content_block = self.store.get(&block.0)?;
292            let (children, content) = content_block.read(&block.1)?;
293            if children.is_empty() && content.len() > 0 {
294                //log_debug!("CONTENT SIZE {}", content.len());
295
296                if level_pos >= content.len() {
297                    return Err(FileError::EndOfFile);
298                }
299                let end = min(content.len(), level_pos + size);
300                return Ok(content[level_pos..end].to_vec());
301            } else {
302                return Err(FileError::BlockDeserializeError);
303            }
304        }
305    }
306}
307
308impl RandomAccessFile {
309    pub fn meta(&self) -> &RandomAccessFileMeta {
310        &self.meta
311    }
312
313    pub fn id(&self) -> &Option<ObjectId> {
314        &self.id
315    }
316
317    pub fn key(&self) -> &Option<ObjectKey> {
318        &self.key
319    }
320
321    fn make_block(
322        mut content: Vec<u8>,
323        conv_key: &[u8; blake3::OUT_LEN],
324        children: Vec<ObjectId>,
325        already_existing: &mut HashMap<BlockKey, BlockId>,
326        store: &Store,
327    ) -> Result<(BlockId, BlockKey), StorageError> {
328        let key_hash = blake3::keyed_hash(conv_key, &content);
329
330        let key_slice = key_hash.as_bytes();
331        let key = SymKey::ChaCha20Key(key_slice.clone());
332        let it = already_existing.get(&key);
333        if it.is_some() {
334            return Ok((*it.unwrap(), key));
335        }
336        let nonce = [0u8; 12];
337        let mut cipher = ChaCha20::new(key_slice.into(), &nonce.into());
338        //let mut content_enc = Vec::from(content);
339        let mut content_enc_slice = &mut content.as_mut_slice();
340        cipher.apply_keystream(&mut content_enc_slice);
341
342        let mut block = Block::new_random_access(children, content, None);
343        //log_debug!(">>> make_block random access: {}", block.id());
344        //log_debug!("!! children: ({}) {:?}", children.len(), children);
345
346        let id = block.get_and_save_id();
347        already_existing.insert(key.clone(), id);
348        //log_debug!("putting *** {}", id);
349        store.put(&block)?;
350        Ok((id, key))
351    }
352
353    fn make_parent_block(
354        conv_key: &[u8; blake3::OUT_LEN],
355        children: Vec<(BlockId, BlockKey)>,
356        already_existing: &mut HashMap<BlockKey, BlockId>,
357        store: &Store,
358    ) -> Result<(BlockId, BlockKey), StorageError> {
359        let mut ids: Vec<BlockId> = Vec::with_capacity(children.len());
360        let mut keys: Vec<BlockKey> = Vec::with_capacity(children.len());
361        children.iter().for_each(|child| {
362            ids.push(child.0);
363            keys.push(child.1.clone());
364        });
365        let content = ChunkContentV0::InternalNode(keys);
366        let content_ser = serde_bare::to_vec(&content).unwrap();
367
368        Self::make_block(content_ser, conv_key, ids, already_existing, store)
369    }
370
371    /// Build tree from leaves, returns parent nodes
372    fn make_tree(
373        already_existing: &mut HashMap<BlockKey, BlockId>,
374        leaves: &[(BlockId, BlockKey)],
375        conv_key: &ChaCha20Key,
376        arity: u16,
377        store: &Store,
378    ) -> Result<(BlockId, BlockKey), StorageError> {
379        let mut parents: Vec<(BlockId, BlockKey)> = vec![];
380        let mut chunks = leaves.chunks(arity as usize);
381        while let Some(nodes) = chunks.next() {
382            //log_debug!("making parent");
383            parents.push(Self::make_parent_block(
384                conv_key,
385                nodes.to_vec(),
386                already_existing,
387                store,
388            )?);
389        }
390        //log_debug!("level with {} parents", parents.len());
391
392        if 1 < parents.len() {
393            return Self::make_tree(already_existing, parents.as_slice(), conv_key, arity, store);
394        }
395        Ok(parents[0].clone())
396    }
397
398    /// returns content_block id/key pair, and root_block id/key pair
399    fn save_(
400        already_existing: &mut HashMap<BlockKey, BlockId>,
401        blocks: &[(BlockId, BlockKey)],
402        meta: &mut RandomAccessFileMeta,
403        conv_key: &ChaCha20Key,
404        store: &Store,
405    ) -> Result<((BlockId, BlockKey), (BlockId, BlockKey)), FileError> {
406        let leaf_blocks_nbr = blocks.len();
407        let arity = meta.arity();
408
409        let mut depth: u8 = u8::MAX;
410        for i in 0..u8::MAX {
411            if leaf_blocks_nbr <= (arity as usize).pow(i.into()) {
412                depth = i;
413                break;
414            }
415        }
416        if depth == u8::MAX {
417            return Err(FileError::TooBig);
418        }
419        meta.set_depth(depth);
420        //log_debug!("depth={} leaves={}", depth, leaf_blocks_nbr);
421
422        let content_block = if depth == 0 {
423            assert!(blocks.len() == 1);
424            blocks[0].clone()
425        } else {
426            // we create the tree
427            Self::make_tree(already_existing, &blocks, &conv_key, arity, store)?
428        };
429
430        let meta_object = Object::new_with_convergence_key(
431            ObjectContent::V0(ObjectContentV0::RandomAccessFileMeta(meta.clone())),
432            None,
433            store_valid_value_size(meta.chunk_size() as usize),
434            conv_key,
435        );
436        //log_debug!("saving meta object");
437        _ = meta_object.save(store)?;
438
439        // creating the root block that contains as first child the meta_object, and as second child the content_block
440        // it is added to storage in make_parent_block
441        //log_debug!("saving root block");
442        let root_block = Self::make_parent_block(
443            conv_key,
444            vec![
445                (meta_object.id(), meta_object.key().unwrap()),
446                content_block.clone(),
447            ],
448            already_existing,
449            store,
450        )?;
451        Ok((content_block, root_block))
452    }
453
454    /// Creates a new file based on a content that is fully known at the time of creation.
455    ///
456    /// If you want to stream progressively the content into the new file, you should use new_empty(), write() and save() instead
457    pub fn new_from_slice(
458        content: &[u8],
459        block_size: usize,
460        content_type: String,
461        metadata: Vec<u8>,
462        store: Arc<Store>,
463    ) -> Result<RandomAccessFile, FileError> {
464        //let max_block_size = store_max_value_size();
465        let valid_block_size = store_valid_value_size(block_size) - BLOCK_EXTRA;
466
467        let arity = ((valid_block_size) / CHILD_SIZE) as u16;
468
469        let total_size = content.len() as u64;
470
471        let mut conv_key = Object::convergence_key(&store);
472
473        let mut blocks: Vec<(BlockId, BlockKey)> = vec![];
474
475        let mut already_existing: HashMap<BlockKey, BlockId> = HashMap::new();
476
477        //log_debug!("making the leaves");
478        for chunk in content.chunks(valid_block_size) {
479            let data_chunk = ChunkContentV0::DataChunk(chunk.to_vec());
480            let content_ser = serde_bare::to_vec(&data_chunk).unwrap();
481            blocks.push(Self::make_block(
482                content_ser,
483                &conv_key,
484                vec![],
485                &mut already_existing,
486                &store,
487            )?);
488        }
489        assert_eq!(
490            (total_size as usize + valid_block_size - 1) / valid_block_size,
491            blocks.len()
492        );
493
494        let mut meta = RandomAccessFileMeta::V0(RandomAccessFileMetaV0 {
495            content_type,
496            metadata,
497            chunk_size: valid_block_size as u32,
498            total_size,
499            arity,
500            depth: 0,
501        });
502
503        let (content_block, root_block) =
504            Self::save_(&mut already_existing, &blocks, &mut meta, &conv_key, &store)?;
505
506        conv_key.zeroize();
507
508        Ok(Self {
509            store,
510            meta,
511            block_contents: HashMap::new(), // not used in this case
512            blocks: vec![],                 // not used in this case
513            id: Some(root_block.0.clone()),
514            key: Some(root_block.1.clone()),
515            content_block: Some(content_block),
516            conv_key: None,    // not used in this case
517            remainder: vec![], // not used in this case
518            size: 0,           // not used in this case
519        })
520    }
521
522    pub fn new_empty(
523        block_size: usize,
524        content_type: String,
525        metadata: Vec<u8>,
526        store: Arc<Store>,
527    ) -> Self {
528        let valid_block_size = store_valid_value_size(block_size) - BLOCK_EXTRA;
529
530        let arity = ((valid_block_size) / CHILD_SIZE) as u16;
531
532        let meta = RandomAccessFileMeta::V0(RandomAccessFileMetaV0 {
533            content_type,
534            metadata,
535            chunk_size: valid_block_size as u32,
536            arity,
537            total_size: 0, // will be filled in later, during save
538            depth: 0,      // will be filled in later, during save
539        });
540
541        Self {
542            store: Arc::clone(&store),
543            meta,
544            block_contents: HashMap::new(),
545            blocks: vec![],
546            id: None,
547            key: None,
548            content_block: None,
549            conv_key: Some(Object::convergence_key(&store)),
550            remainder: vec![],
551            size: 0,
552        }
553    }
554
555    /// Appends some data at the end of the file currently created with new_empty() and not saved yet.
556    /// you can call it many times. Don't forget to eventually call save()
557    pub fn write(&mut self, data: &[u8]) -> Result<(), FileError> {
558        if self.id.is_some() {
559            return Err(FileError::AlreadySaved);
560        }
561        let remainder = self.remainder.len();
562        let chunk_size = self.meta.chunk_size() as usize;
563        let mut pos: usize = 0;
564        let conv_key = self.conv_key.unwrap();
565        // TODO: provide an option to search in storage for already existing, when doing a resume of previously aborted write
566        let mut already_existing: HashMap<BlockKey, BlockId> = HashMap::new();
567
568        if remainder > 0 {
569            if data.len() >= chunk_size - remainder {
570                let mut new_block = Vec::with_capacity(chunk_size);
571                new_block.append(&mut self.remainder);
572                pos = chunk_size - remainder;
573                self.size += chunk_size;
574                //log_debug!("size += chunk_size {} {}", self.size, chunk_size);
575                new_block.extend(data[0..pos].iter());
576                assert_eq!(new_block.len(), chunk_size);
577                let data_chunk = ChunkContentV0::DataChunk(new_block);
578                let content_ser = serde_bare::to_vec(&data_chunk).unwrap();
579                self.blocks.push(Self::make_block(
580                    content_ser,
581                    &conv_key,
582                    vec![],
583                    &mut already_existing,
584                    &self.store,
585                )?);
586            } else {
587                // not enough data to create a new block
588                self.remainder.extend(data.iter());
589                return Ok(());
590            }
591        } else if data.len() < chunk_size {
592            self.remainder = Vec::from(data);
593            return Ok(());
594        }
595
596        for chunk in data[pos..].chunks(chunk_size) {
597            if chunk.len() == chunk_size {
598                self.size += chunk_size;
599                //log_debug!("size += chunk_size {} {}", self.size, chunk_size);
600                let data_chunk = ChunkContentV0::DataChunk(chunk.to_vec());
601                let content_ser = serde_bare::to_vec(&data_chunk).unwrap();
602                self.blocks.push(Self::make_block(
603                    content_ser,
604                    &conv_key,
605                    vec![],
606                    &mut already_existing,
607                    &self.store,
608                )?);
609            } else {
610                self.remainder = Vec::from(chunk);
611                return Ok(());
612            }
613        }
614        Ok(())
615    }
616
617    pub fn save(&mut self) -> Result<ObjectId, FileError> {
618        if self.id.is_some() {
619            return Err(FileError::AlreadySaved);
620        }
621        // save the remainder, if any.
622        if self.remainder.len() > 0 {
623            self.size += self.remainder.len();
624            //log_debug!("size += remainder {} {}", self.size, self.remainder.len());
625            let mut remainder = Vec::with_capacity(self.remainder.len());
626            remainder.append(&mut self.remainder);
627            let data_chunk = ChunkContentV0::DataChunk(remainder);
628            let content_ser = serde_bare::to_vec(&data_chunk).unwrap();
629            self.blocks.push(Self::make_block(
630                content_ser,
631                &self.conv_key.unwrap(),
632                vec![],
633                &mut HashMap::new(),
634                &self.store,
635            )?);
636        }
637
638        self.meta.set_total_size(self.size as u64);
639
640        let mut already_existing: HashMap<BlockKey, BlockId> = HashMap::new();
641        let (content_block, root_block) = Self::save_(
642            &mut already_existing,
643            &self.blocks,
644            &mut self.meta,
645            self.conv_key.as_ref().unwrap(),
646            &self.store,
647        )?;
648
649        self.conv_key.as_mut().unwrap().zeroize();
650        self.conv_key = None;
651
652        self.id = Some(root_block.0);
653        self.key = Some(root_block.1.clone());
654        self.content_block = Some(content_block);
655
656        self.blocks = vec![];
657        self.blocks.shrink_to_fit();
658
659        Ok(root_block.0)
660    }
661
662    pub fn reference(&self) -> Option<ObjectRef> {
663        if self.key.is_some() && self.id.is_some() {
664            Some(ObjectRef::from_id_key(
665                self.id.unwrap(),
666                self.key.as_ref().unwrap().clone(),
667            ))
668        } else {
669            None
670        }
671    }
672
673    /// Opens a file for read purpose.
674    pub fn open(
675        id: ObjectId,
676        key: SymKey,
677        store: Arc<Store>,
678    ) -> Result<RandomAccessFile, FileError> {
679        // load root block
680        let root_block = store.get(&id)?;
681
682        if root_block.children().len() != 2
683            || *root_block.content().commit_header_obj() != CommitHeaderObject::RandomAccess
684        {
685            return Err(FileError::BlockDeserializeError);
686        }
687
688        let (root_sub_blocks, _) = root_block.read(&key)?;
689
690        // load meta object (first one in root block)
691        let meta_object = Object::load(
692            root_sub_blocks[0].0,
693            Some(root_sub_blocks[0].1.clone()),
694            &store,
695        )?;
696
697        let meta = match meta_object.content_v0()? {
698            ObjectContentV0::RandomAccessFileMeta(meta) => meta,
699            _ => return Err(FileError::InvalidChildren),
700        };
701
702        Ok(RandomAccessFile {
703            store,
704            meta,
705            block_contents: HashMap::new(), // not used in this case
706            blocks: vec![(id, SymKey::nil()), (root_sub_blocks[0].0, SymKey::nil())], // not used in this case
707            id: Some(id),
708            key: Some(key),
709            content_block: Some(root_sub_blocks[1].clone()),
710            conv_key: None,
711            remainder: vec![],
712            size: 0,
713        })
714    }
715
716    pub fn blocks(&self) -> impl Iterator<Item = Block> + '_ {
717        self.blocks
718            .iter()
719            .map(|key| self.store.get(&key.0).unwrap())
720    }
721
722    /// Size once encoded, before deduplication. Only available before save()
723    pub fn size(&self) -> usize {
724        let mut total = 0;
725        self.blocks().for_each(|b| total += b.size());
726        total
727    }
728
729    /// Real size on disk
730    pub fn dedup_size(&self) -> usize {
731        let mut total = 0;
732        self.block_contents
733            .values()
734            .for_each(|b| total += self.store.get(b).unwrap().size());
735        total
736    }
737
738    pub fn depth(&self) -> Result<u8, NgError> {
739        Ok(self.meta.depth())
740
741        // unimplemented!();
742        // if self.key().is_none() {
743        //     return Err(ObjectParseError::MissingRootKey);
744        // }
745        // let parents = vec![(self.id(), self.key().unwrap())];
746        // Self::collect_leaves(
747        //     &self.blocks,
748        //     &parents,
749        //     self.blocks.len() - 1,
750        //     &mut None,
751        //     &mut None,
752        //     &self.block_contents,
753        // )
754    }
755}
756
757impl fmt::Display for RandomAccessFile {
758    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
759        writeln!(
760            f,
761            "====== File ID {}",
762            self.id
763                .map_or("NOT SAVED".to_string(), |i| format!("{}", i))
764        )?;
765        writeln!(
766            f,
767            "== Key:    {}",
768            self.key
769                .as_ref()
770                .map_or("None".to_string(), |k| format!("{}", k))
771        )?;
772        writeln!(f, "== depth:        {}", self.meta.depth())?;
773        writeln!(f, "== arity:        {}", self.meta.arity())?;
774        writeln!(f, "== chunk_size:   {}", self.meta.chunk_size())?;
775        writeln!(f, "== total_size:   {}", self.meta.total_size())?;
776        writeln!(f, "== content_type: {}", self.meta.content_type())?;
777        writeln!(f, "== metadata len: {}", self.meta.metadata().len())?;
778        if self.id.is_none() {
779            writeln!(f, "== blocks to save: {}", self.blocks.len())?;
780        }
781        Ok(())
782    }
783}
784
785#[cfg(test)]
786mod test {
787
788    use crate::file::*;
789    use std::io::BufReader;
790    use std::io::Read;
791
792    /// Checks that a content that does fit in one block, creates an arity of 0
793    #[test]
794    pub fn test_depth_0() {
795        let block_size = store_max_value_size();
796        //store_valid_value_size(0)
797
798        ////// 1 MB of data!
799        let data_size = block_size - BLOCK_EXTRA;
800
801        let store = Store::dummy_public_v0();
802        log_debug!("creating 1MB of data");
803        let content: Vec<u8> = vec![99; data_size];
804
805        log_debug!("creating random access file with that data");
806        let file: RandomAccessFile = RandomAccessFile::new_from_slice(
807            &content,
808            block_size,
809            "text/plain".to_string(),
810            vec![],
811            Arc::clone(&store),
812        )
813        .expect("new_from_slice");
814        log_debug!("{}", file);
815
816        let id = file.id.as_ref().unwrap().clone();
817
818        let file_size = file.size();
819        log_debug!("file size to save : {}", file_size);
820
821        log_debug!("data size: {}", data_size);
822
823        let read_content = file.read(0, data_size).expect("reading all");
824        assert_eq!(read_content, content);
825
826        let read_content2 = file.read(0, data_size + 1);
827        assert_eq!(read_content2.unwrap().len(), 1048564);
828
829        let read_content = file.read(data_size - 9, 9).expect("reading end");
830        assert_eq!(read_content, vec![99, 99, 99, 99, 99, 99, 99, 99, 99]);
831
832        let read_content = file.read(data_size - 9, 10);
833        assert_eq!(read_content, Ok(vec![99, 99, 99, 99, 99, 99, 99, 99, 99]));
834
835        // log_debug!(
836        //     "overhead: {} - {}%",
837        //     file_size - data_size,
838        //     ((file_size - data_size) * 100) as f32 / data_size as f32
839        // );
840
841        // let dedup_size = file.dedup_size();
842        // log_debug!(
843        //     "dedup compression: {} - {}%",
844        //     data_size - dedup_size,
845        //     ((data_size - dedup_size) * 100) as f32 / data_size as f32
846        // );
847
848        // log_debug!("number of blocks : {}", file.blocks.len());
849        // assert_eq!(
850        //     file.blocks.len(),
851        //     MAX_ARITY_LEAVES * (MAX_ARITY_LEAVES + 1) * MAX_ARITY_LEAVES + MAX_ARITY_LEAVES + 1
852        // );
853        assert_eq!(file.depth(), Ok(0));
854        assert_eq!(store.len(), Ok(3));
855
856        let file = RandomAccessFile::open(id, file.key.unwrap(), store).expect("re open");
857
858        log_debug!("{}", file);
859
860        let read_content = file.read(0, data_size).expect("reading all after re open");
861        assert_eq!(read_content, content);
862    }
863
864    /// Checks that a content that doesn't fit in all the children of first level in tree
865    #[ignore]
866    #[test]
867    pub fn test_depth_1() {
868        const MAX_ARITY_LEAVES: usize = 15887;
869        const MAX_DATA_PAYLOAD_SIZE: usize = 1048564;
870
871        ////// 16 GB of data!
872        let data_size = MAX_ARITY_LEAVES * MAX_DATA_PAYLOAD_SIZE;
873
874        let store = Store::dummy_public_v0();
875        log_debug!("creating 16GB of data");
876
877        let content: Vec<u8> = vec![99; data_size];
878
879        log_debug!("creating random access file with that data");
880        let file: RandomAccessFile = RandomAccessFile::new_from_slice(
881            &content,
882            store_max_value_size(),
883            "text/plain".to_string(),
884            vec![],
885            Arc::clone(&store),
886        )
887        .expect("new_from_slice");
888        log_debug!("{}", file);
889
890        let _id = file.id.as_ref().unwrap().clone();
891
892        log_debug!("data size: {}", data_size);
893
894        assert_eq!(file.depth(), Ok(1));
895
896        assert_eq!(store.len(), Ok(4));
897    }
898
899    /// Checks that a content that doesn't fit in all the children of first level in tree
900    #[ignore]
901    #[test]
902    pub fn test_depth_2() {
903        const MAX_ARITY_LEAVES: usize = 15887;
904        const MAX_DATA_PAYLOAD_SIZE: usize = 1048564;
905
906        ////// 16 GB of data!
907        let data_size = MAX_ARITY_LEAVES * MAX_DATA_PAYLOAD_SIZE + 1;
908
909        let store = Store::dummy_public_v0();
910        log_debug!("creating 16GB of data");
911        let content: Vec<u8> = vec![99; data_size];
912
913        log_debug!("creating file with that data");
914        let file: RandomAccessFile = RandomAccessFile::new_from_slice(
915            &content,
916            store_max_value_size(),
917            "text/plain".to_string(),
918            vec![],
919            Arc::clone(&store),
920        )
921        .expect("new_from_slice");
922        log_debug!("{}", file);
923
924        let file_size = file.size();
925        log_debug!("file size: {}", file_size);
926
927        log_debug!("data size: {}", data_size);
928
929        assert_eq!(file.depth().unwrap(), 2);
930
931        assert_eq!(store.len(), Ok(7));
932    }
933
934    /// Checks that a content that doesn't fit in all the children of first level in tree
935    #[test]
936    pub fn test_depth_3() {
937        const MAX_ARITY_LEAVES: usize = 61;
938        const MAX_DATA_PAYLOAD_SIZE: usize = 4084;
939
940        ////// 900 MB of data!
941        let data_size =
942            MAX_ARITY_LEAVES * MAX_ARITY_LEAVES * MAX_ARITY_LEAVES * MAX_DATA_PAYLOAD_SIZE;
943
944        let store = Store::dummy_public_v0();
945        log_debug!("creating 900MB of data");
946        let content: Vec<u8> = vec![99; data_size];
947
948        log_debug!("creating file with that data");
949        let file: RandomAccessFile = RandomAccessFile::new_from_slice(
950            &content,
951            store_valid_value_size(0),
952            "text/plain".to_string(),
953            vec![],
954            Arc::clone(&store),
955        )
956        .expect("new_from_slice");
957        log_debug!("{}", file);
958
959        let file_size = file.size();
960        log_debug!("file size: {}", file_size);
961
962        let read_content = file.read(0, data_size).expect("reading all");
963        assert_eq!(read_content.len(), MAX_DATA_PAYLOAD_SIZE);
964
965        let read_content = file.read(9000, 10000).expect("reading 10k");
966        assert_eq!(read_content, vec![99; 3252]);
967
968        // log_debug!("data size: {}", data_size);
969        // log_debug!(
970        //     "overhead: {} - {}%",
971        //     file_size - data_size,
972        //     ((file_size - data_size) * 100) as f32 / data_size as f32
973        // );
974
975        // let dedup_size = file.dedup_size();
976        // log_debug!(
977        //     "dedup compression: {} - {}%",
978        //     data_size - dedup_size,
979        //     ((data_size - dedup_size) * 100) as f32 / data_size as f32
980        // );
981
982        // log_debug!("number of blocks : {}", file.blocks.len());
983        // assert_eq!(
984        //     file.blocks.len(),
985        //     MAX_ARITY_LEAVES * (MAX_ARITY_LEAVES + 1) * MAX_ARITY_LEAVES + MAX_ARITY_LEAVES + 1
986        // );
987        assert_eq!(file.depth().unwrap(), 3);
988
989        assert_eq!(store.len(), Ok(6));
990    }
991
992    /// Checks that a content that doesn't fit in all the children of first level in tree
993    #[ignore]
994    #[test]
995    pub fn test_depth_4() {
996        const MAX_ARITY_LEAVES: usize = 61;
997        const MAX_DATA_PAYLOAD_SIZE: usize = 4084;
998
999        ////// 52GB of data!
1000        let data_size = MAX_ARITY_LEAVES
1001            * MAX_ARITY_LEAVES
1002            * MAX_ARITY_LEAVES
1003            * MAX_ARITY_LEAVES
1004            * MAX_DATA_PAYLOAD_SIZE;
1005
1006        let store = Store::dummy_public_v0();
1007        log_debug!("creating 55GB of data");
1008        let content: Vec<u8> = vec![99; data_size];
1009
1010        log_debug!("creating file with that data");
1011        let file: RandomAccessFile = RandomAccessFile::new_from_slice(
1012            &content,
1013            store_valid_value_size(0),
1014            "text/plain".to_string(),
1015            vec![],
1016            Arc::clone(&store),
1017        )
1018        .expect("new_from_slice");
1019
1020        log_debug!("{}", file);
1021
1022        let file_size = file.size();
1023        log_debug!("file size: {}", file_size);
1024
1025        log_debug!("data size: {}", data_size);
1026
1027        assert_eq!(file.depth().unwrap(), 4);
1028
1029        assert_eq!(store.len(), Ok(7));
1030    }
1031
1032    /// Test async write to a file all at once
1033    #[test]
1034    pub fn test_write_all_at_once() {
1035        let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
1036        let mut reader = BufReader::new(f);
1037        let mut img_buffer: Vec<u8> = Vec::new();
1038        reader
1039            .read_to_end(&mut img_buffer)
1040            .expect("read of test.jpg");
1041
1042        let store = Store::dummy_public_v0();
1043
1044        log_debug!("creating file with the JPG content");
1045        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1046            store_max_value_size(), //store_valid_value_size(0),//
1047            "image/jpeg".to_string(),
1048            vec![],
1049            store,
1050        );
1051
1052        log_debug!("{}", file);
1053
1054        file.write(&img_buffer).expect("write all at once");
1055
1056        // !!! all those tests work only because store_max_value_size() is bigger than the actual size of the JPEG file. so it fits in one block.
1057
1058        assert_eq!(
1059            file.read(0, img_buffer.len()).expect("read before save"),
1060            img_buffer
1061        );
1062
1063        // asking too much, receiving just enough
1064        assert_eq!(
1065            file.read(0, img_buffer.len() + 1)
1066                .expect("read before save"),
1067            img_buffer
1068        );
1069
1070        // // reading too far, well behind the size of the JPG
1071        // assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
1072
1073        assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]);
1074
1075        // // reading one byte after the end of the file size.
1076        // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
1077
1078        assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
1079
1080        file.save().expect("save");
1081
1082        let res = file.read(0, img_buffer.len()).expect("read all");
1083        assert_eq!(res, img_buffer);
1084
1085        // // asking too much, receiving an error, as now we know the total size of file, and we check it
1086        // assert_eq!(
1087        //     file.read(0, img_buffer.len() + 1),
1088        //     Err(FileError::EndOfFile)
1089        // );
1090
1091        // reading too far, well behind the size of the JPG
1092        assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
1093
1094        assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]);
1095
1096        // // reading one byte after the end of the file size.
1097        // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
1098
1099        assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
1100    }
1101
1102    /// Test async write to a file by increments
1103    #[test]
1104    pub fn test_write_by_increments() {
1105        let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
1106        let mut reader = BufReader::new(f);
1107        let mut img_buffer: Vec<u8> = Vec::new();
1108        reader
1109            .read_to_end(&mut img_buffer)
1110            .expect("read of test.jpg");
1111
1112        let store = Store::dummy_public_v0();
1113
1114        log_debug!("creating file with the JPG content");
1115        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1116            store_max_value_size(), //store_valid_value_size(0),//
1117            "image/jpeg".to_string(),
1118            vec![],
1119            store,
1120        );
1121
1122        log_debug!("{}", file);
1123
1124        for chunk in img_buffer.chunks(1000) {
1125            file.write(chunk).expect("write a chunk");
1126        }
1127
1128        assert_eq!(
1129            file.read(0, img_buffer.len()).expect("read before save"),
1130            img_buffer
1131        );
1132
1133        // asking too much, receiving just enough
1134        assert_eq!(
1135            file.read(0, img_buffer.len() + 1)
1136                .expect("read before save"),
1137            img_buffer
1138        );
1139
1140        // reading too far, well behind the size of the JPG
1141        assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
1142
1143        assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]);
1144
1145        // reading one byte after the end of the file size.
1146        assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
1147
1148        assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
1149
1150        file.save().expect("save");
1151
1152        // this works only because store_max_value_size() is bigger than the actual size of the JPEG file. so it fits in one block.
1153        let res = file.read(0, img_buffer.len()).expect("read all");
1154
1155        assert_eq!(res, img_buffer);
1156
1157        // // asking too much, receiving an error, as now we know the total size of file, and we check it
1158        // assert_eq!(
1159        //     file.read(0, img_buffer.len() + 1),
1160        //     Err(FileError::EndOfFile)
1161        // );
1162
1163        // reading too far, well behind the size of the JPG
1164        assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
1165
1166        assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]);
1167
1168        // // reading one byte after the end of the file size.
1169        // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
1170
1171        assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
1172    }
1173
1174    /// Test async write to a file by increments small blocks
1175    #[test]
1176    pub fn test_write_by_increments_small_blocks() {
1177        let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
1178        let mut reader = BufReader::new(f);
1179        let mut img_buffer: Vec<u8> = Vec::new();
1180        reader
1181            .read_to_end(&mut img_buffer)
1182            .expect("read of test.jpg");
1183
1184        let store = Store::dummy_public_v0();
1185
1186        log_debug!("creating file with the JPG content");
1187        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1188            store_valid_value_size(0),
1189            "image/jpeg".to_string(),
1190            vec![],
1191            store,
1192        );
1193
1194        log_debug!("{}", file);
1195
1196        let first_block_content = img_buffer[0..4084].to_vec();
1197
1198        for chunk in img_buffer.chunks(1000) {
1199            file.write(chunk).expect("write a chunk");
1200        }
1201
1202        log_debug!("{}", file);
1203
1204        assert_eq!(
1205            file.read(0, img_buffer.len()).expect("read before save"),
1206            first_block_content
1207        );
1208
1209        // asking too much, receiving just enough
1210        assert_eq!(
1211            file.read(0, img_buffer.len() + 1)
1212                .expect("read before save"),
1213            first_block_content
1214        );
1215
1216        // reading too far, well behind the size of the JPG
1217        assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
1218
1219        assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]);
1220
1221        // // reading one byte after the end of the file size.
1222        // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
1223
1224        assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
1225
1226        file.save().expect("save");
1227
1228        log_debug!("{}", file);
1229
1230        assert_eq!(img_buffer.len(), file.meta.total_size() as usize);
1231
1232        let res = file.read(0, img_buffer.len()).expect("read all");
1233        assert_eq!(res, first_block_content);
1234
1235        // // asking too much, not receiving an error, as we know the total size of file, and return what we can
1236        // assert_eq!(
1237        //     file.read(0, img_buffer.len() + 1),
1238        //     Err(FileError::EndOfFile)
1239        // );
1240
1241        // reading too far, well behind the size of the JPG
1242        assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
1243
1244        assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]);
1245
1246        // // reading one byte after the end of the file size.
1247        // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
1248
1249        assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
1250    }
1251
1252    /// Test async write to a file all at once
1253    #[test]
1254    pub fn test_write_all_at_once_small_blocks() {
1255        let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
1256        let mut reader = BufReader::new(f);
1257        let mut img_buffer: Vec<u8> = Vec::new();
1258        reader
1259            .read_to_end(&mut img_buffer)
1260            .expect("read of test.jpg");
1261
1262        let first_block_content = img_buffer[0..4084].to_vec();
1263
1264        let store = Store::dummy_public_v0();
1265
1266        log_debug!("creating file with the JPG content");
1267        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1268            store_valid_value_size(0),
1269            "image/jpeg".to_string(),
1270            vec![],
1271            store,
1272        );
1273
1274        log_debug!("{}", file);
1275
1276        file.write(&img_buffer).expect("write all at once");
1277
1278        assert_eq!(
1279            file.read(0, img_buffer.len()).expect("read before save"),
1280            first_block_content
1281        );
1282
1283        // asking too much, receiving just enough
1284        assert_eq!(
1285            file.read(0, img_buffer.len() + 1)
1286                .expect("read before save"),
1287            first_block_content
1288        );
1289
1290        // reading too far, well behind the size of the JPG
1291        assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
1292
1293        assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]);
1294
1295        // // reading one byte after the end of the file size.
1296        // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
1297
1298        assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
1299
1300        file.save().expect("save");
1301
1302        let res = file.read(0, img_buffer.len()).expect("read all");
1303        assert_eq!(res, first_block_content);
1304
1305        let res = file.read(10, img_buffer.len() - 10).expect("read all");
1306        assert_eq!(res, first_block_content[10..].to_vec());
1307
1308        // // asking too much, receiving an error, as now we know the total size of file, and we check it
1309        // assert_eq!(
1310        //     file.read(0, img_buffer.len() + 1),
1311        //     Err(FileError::EndOfFile)
1312        // );
1313
1314        // reading too far, well behind the size of the JPG
1315        assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
1316
1317        assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]);
1318
1319        // // reading one byte after the end of the file size.
1320        // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
1321
1322        assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
1323    }
1324
1325    /// Test depth 4 with 52GB of data, but using write in small increments, so the memory burden on the system will be minimal
1326    #[ignore]
1327    #[test]
1328    pub fn test_depth_4_write_small() {
1329        const MAX_ARITY_LEAVES: usize = 61;
1330        const MAX_DATA_PAYLOAD_SIZE: usize = 4084;
1331
1332        ////// 52GB of data!
1333        let data_size = MAX_ARITY_LEAVES
1334            * MAX_ARITY_LEAVES
1335            * MAX_ARITY_LEAVES
1336            * MAX_ARITY_LEAVES
1337            * MAX_DATA_PAYLOAD_SIZE;
1338
1339        // chunks of 5 MB
1340        let chunk_nbr = data_size / 5000000;
1341        let last_chunk = data_size % 5000000;
1342
1343        let store = Store::dummy_public_v0();
1344
1345        log_debug!("creating empty file");
1346        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1347            store_valid_value_size(0),
1348            "image/jpeg".to_string(),
1349            vec![],
1350            Arc::clone(&store),
1351        );
1352
1353        log_debug!("{}", file);
1354
1355        let chunk = vec![99; 5000000];
1356        let last_chunk = vec![99; last_chunk];
1357
1358        for _i in 0..chunk_nbr {
1359            file.write(&chunk).expect("write a chunk");
1360        }
1361
1362        file.write(&last_chunk).expect("write last chunk");
1363
1364        log_debug!("{}", file);
1365
1366        file.save().expect("save");
1367
1368        log_debug!("{}", file);
1369
1370        let file_size = file.size();
1371        log_debug!("file size: {}", file_size);
1372
1373        log_debug!("data size: {}", data_size);
1374
1375        assert_eq!(data_size, file.meta.total_size() as usize);
1376
1377        assert_eq!(file.depth().unwrap(), 4);
1378
1379        assert_eq!(store.len(), Ok(7));
1380    }
1381
1382    /// Test open
1383    #[test]
1384    pub fn test_open() {
1385        let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
1386        let mut reader = BufReader::new(f);
1387        let mut img_buffer: Vec<u8> = Vec::new();
1388        reader
1389            .read_to_end(&mut img_buffer)
1390            .expect("read of test.jpg");
1391
1392        let store = Store::dummy_public_v0();
1393
1394        log_debug!("creating file with the JPG content");
1395        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1396            store_max_value_size(), //store_valid_value_size(0),//
1397            "image/jpeg".to_string(),
1398            vec![],
1399            Arc::clone(&store),
1400        );
1401
1402        log_debug!("{}", file);
1403
1404        for chunk in img_buffer.chunks(1000) {
1405            file.write(chunk).expect("write a chunk");
1406        }
1407
1408        file.save().expect("save");
1409
1410        let file2 = RandomAccessFile::open(file.id().unwrap(), file.key.unwrap(), store)
1411            .expect("reopen file");
1412
1413        // this works only because store_max_value_size() is bigger than the actual size of the JPEG file. so it fits in one block.
1414        let res = file2.read(0, img_buffer.len()).expect("read all");
1415
1416        log_debug!("{}", file2);
1417
1418        assert_eq!(res, img_buffer);
1419
1420        // // asking too much, receiving an error, as now we know the total size of file, and we check it
1421        // assert_eq!(
1422        //     file2.read(0, img_buffer.len() + 1),
1423        //     Err(FileError::EndOfFile)
1424        // );
1425
1426        // reading too far, well behind the size of the JPG
1427        assert_eq!(file2.read(100000, 1), Err(FileError::EndOfFile));
1428
1429        assert_eq!(file2.read(10000, 1).expect("read after save"), vec![41]);
1430
1431        // // reading one byte after the end of the file size.
1432        // assert_eq!(file2.read(29454, 1), Err(FileError::EndOfFile));
1433
1434        assert_eq!(file2.read(29454, 0), Err(FileError::InvalidArgument));
1435    }
1436
1437    /// Test read JPEG file small
1438    #[test]
1439    pub fn test_read_small_file() {
1440        let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
1441        let mut reader = BufReader::new(f);
1442        let mut img_buffer: Vec<u8> = Vec::new();
1443        reader
1444            .read_to_end(&mut img_buffer)
1445            .expect("read of test.jpg");
1446        let len = img_buffer.len();
1447        let content = ObjectContent::new_file_v0_with_content(img_buffer.clone(), "image/jpeg");
1448
1449        let max_object_size = store_max_value_size();
1450        let store = Store::dummy_public_v0();
1451        let mut obj = Object::new(content, None, max_object_size, &store);
1452
1453        log_debug!("{}", obj);
1454
1455        let _ = obj.save_in_test(&store).expect("save");
1456
1457        let file = File::open(obj.id(), obj.key().unwrap(), store).expect("open");
1458
1459        let res = file.read(0, len).expect("read all");
1460
1461        assert_eq!(res, img_buffer);
1462    }
1463
1464    /// Test read JPEG file random access
1465    #[test]
1466    pub fn test_read_random_access_file() {
1467        let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
1468        let mut reader = BufReader::new(f);
1469        let mut img_buffer: Vec<u8> = Vec::new();
1470        reader
1471            .read_to_end(&mut img_buffer)
1472            .expect("read of test.jpg");
1473        let len = img_buffer.len();
1474
1475        let max_object_size = store_max_value_size();
1476        let store = Store::dummy_public_v0();
1477
1478        log_debug!("creating empty file");
1479        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1480            max_object_size,
1481            "image/jpeg".to_string(),
1482            vec![],
1483            Arc::clone(&store),
1484        );
1485
1486        file.write(&img_buffer).expect("write all");
1487
1488        log_debug!("{}", file);
1489
1490        file.save().expect("save");
1491
1492        log_debug!("{}", file);
1493
1494        let file = File::open(
1495            file.id().unwrap(),
1496            file.key().as_ref().unwrap().clone(),
1497            store,
1498        )
1499        .expect("open");
1500
1501        // this only works because we chose a big block size (1MB) so the small JPG file fits in one block.
1502        // if not, we would have to call read repeatedly and append the results into a buffer, in order to get the full file
1503        let res = file.read(0, len).expect("read all");
1504
1505        assert_eq!(res, img_buffer);
1506    }
1507
1508    /// Test depth 4, but using write in increments, so the memory burden on the system will be minimal
1509    #[ignore]
1510    #[test]
1511    pub fn test_depth_4_big_write_small() {
1512        let encoding_big_file = std::time::Instant::now();
1513
1514        let f = std::fs::File::open("[enter path of a big file here]").expect("open of a big file");
1515        let mut reader = BufReader::new(f);
1516
1517        let store = Store::dummy_public_v0();
1518
1519        log_debug!("creating empty file");
1520        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1521            store_valid_value_size(0),
1522            "image/jpeg".to_string(),
1523            vec![],
1524            store,
1525        );
1526
1527        log_debug!("{}", file);
1528
1529        let mut chunk = [0u8; 1000000];
1530
1531        loop {
1532            let size = reader.read(&mut chunk).expect("read a chunk");
1533            //log_debug!("{}", size);
1534            file.write(&chunk[0..size]).expect("write a chunk");
1535            if size != 1000000 {
1536                break;
1537            }
1538        }
1539
1540        log_debug!("{}", file);
1541
1542        file.save().expect("save");
1543
1544        log_debug!("{}", file);
1545
1546        log_debug!("data size: {}", file.meta.total_size());
1547
1548        //assert_eq!(data_size, file.meta.total_size() as usize);
1549
1550        assert_eq!(file.depth().unwrap(), 4);
1551
1552        log_debug!(
1553            "encoding_big_file took: {} s",
1554            encoding_big_file.elapsed().as_secs_f32()
1555        );
1556    }
1557
1558    /// Test depth 4 with 2.7GB of data, but using write in increments, so the memory burden on the system will be minimal
1559    #[ignore]
1560    #[test]
1561    pub fn test_depth_4_big_write_big() {
1562        let encoding_big_file = std::time::Instant::now();
1563
1564        let f = std::fs::File::open("[enter path of a big file here]").expect("open of a big file");
1565        let mut reader = BufReader::new(f);
1566
1567        let store = Store::dummy_public_v0();
1568
1569        log_debug!("creating empty file");
1570        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1571            store_max_value_size(),
1572            "image/jpeg".to_string(),
1573            vec![],
1574            store,
1575        );
1576
1577        log_debug!("{}", file);
1578
1579        let mut chunk = [0u8; 2000000];
1580
1581        loop {
1582            let size = reader.read(&mut chunk).expect("read a chunk");
1583            //log_debug!("{}", size);
1584            file.write(&chunk[0..size]).expect("write a chunk");
1585            if size != 2000000 {
1586                break;
1587            }
1588        }
1589
1590        log_debug!("{}", file);
1591
1592        file.save().expect("save");
1593
1594        log_debug!("{}", file);
1595
1596        log_debug!("data size: {}", file.meta.total_size());
1597
1598        //assert_eq!(data_size, file.meta.total_size() as usize);
1599
1600        assert_eq!(file.depth().unwrap(), 1);
1601
1602        log_debug!(
1603            "encoding_big_file took: {} s",
1604            encoding_big_file.elapsed().as_secs_f32()
1605        );
1606    }
1607}