ng_repo/
file.rs

1// Copyright (c) 2022-2024 Niko Bonnieure, Par le Peuple, NextGraph.org developers
2// All rights reserved.
3// Licensed under the Apache License, Version 2.0
4// <LICENSE-APACHE2 or http://www.apache.org/licenses/LICENSE-2.0>
5// or the MIT license <LICENSE-MIT or http://opensource.org/licenses/MIT>,
6// at your option. All files in the project carrying such
7// notice may not be copied, modified, or distributed except
8// according to those terms.
9
10//! SmallFile and RandomAccessFile objects
11
12use core::fmt;
13use std::cmp::min;
14use std::collections::HashMap;
15use std::sync::Arc;
16
17use chacha20::cipher::{KeyIvInit, StreamCipher};
18use chacha20::ChaCha20;
19use zeroize::Zeroize;
20
21use crate::block_storage::*;
22use crate::errors::*;
23#[allow(unused_imports)]
24use crate::log::*;
25use crate::object::*;
26use crate::store::Store;
27use crate::types::*;
28
29/// File errors
30#[derive(Debug, Eq, PartialEq, Clone)]
31pub enum FileError {
32    /// Missing blocks
33    MissingBlocks(Vec<BlockId>),
34    /// Missing root key
35    MissingRootKey,
36    /// Invalid BlockId encountered in the tree
37    InvalidBlockId,
38    /// Too many or too few children of a block
39    InvalidChildren,
40    /// Number of keys does not match number of children of a block
41    InvalidKeys,
42    /// Invalid CommitHeader object content
43    InvalidHeader,
44    /// Error deserializing content of a block
45    BlockDeserializeError,
46    /// Error deserializing content of the RandomAccessFileMeta
47    MetaDeserializeError,
48    /// Files are immutable, you cannot modify them and this one was already saved once. Create a new File for your new data (and delete the old one if needed)
49    AlreadySaved,
50    /// File is too big
51    TooBig,
52    NotFound,
53    StorageError,
54    EndOfFile,
55    InvalidArgument,
56    NotAFile,
57}
58
59impl From<StorageError> for FileError {
60    fn from(e: StorageError) -> Self {
61        match e {
62            StorageError::NotFound => FileError::NotFound,
63            _ => FileError::StorageError,
64        }
65    }
66}
67
68impl From<ObjectParseError> for FileError {
69    fn from(e: ObjectParseError) -> Self {
70        match e {
71            _ => FileError::BlockDeserializeError,
72        }
73    }
74}
75
76pub trait ReadFile {
77    fn read(&self, pos: usize, size: usize) -> Result<Vec<u8>, FileError>;
78
79    fn get_all_blocks_ids(&self) -> Result<Vec<ObjectId>, FileError>;
80}
81
82/// A File in memory (read access only)
83pub struct File<'a> {
84    internal: Box<dyn ReadFile + 'a>,
85    blocks_ids: Vec<BlockId>,
86}
87
88impl<'a> File<'a> {
89    pub fn open(id: ObjectId, key: SymKey, store: Arc<Store>) -> Result<File<'a>, FileError> {
90        let root_block = store.get(&id)?;
91
92        if root_block.children().len() == 2
93            && *root_block.content().commit_header_obj() == CommitHeaderObject::RandomAccess
94        {
95            Ok(File {
96                internal: Box::new(RandomAccessFile::open(id, key, store)?),
97                blocks_ids: vec![],
98            })
99        } else {
100            let obj = Object::load(id, Some(key), &store)?;
101            match obj.content_v0()? {
102                ObjectContentV0::SmallFile(small_file) => Ok(File {
103                    internal: Box::new(small_file),
104                    blocks_ids: obj.block_ids(),
105                }),
106                _ => Err(FileError::NotAFile),
107            }
108        }
109    }
110}
111
112impl<'a> ReadFile for File<'a> {
113    fn read(&self, pos: usize, size: usize) -> Result<Vec<u8>, FileError> {
114        self.internal.read(pos, size)
115    }
116    fn get_all_blocks_ids(&self) -> Result<Vec<ObjectId>, FileError> {
117        if self.blocks_ids.len() > 0 {
118            Ok(self.blocks_ids.to_vec())
119        } else {
120            self.internal.get_all_blocks_ids()
121        }
122    }
123}
124
125impl ReadFile for SmallFile {
126    fn read(&self, pos: usize, size: usize) -> Result<Vec<u8>, FileError> {
127        match self {
128            Self::V0(v0) => v0.read(pos, size),
129        }
130    }
131    fn get_all_blocks_ids(&self) -> Result<Vec<ObjectId>, FileError> {
132        unimplemented!();
133    }
134}
135
136impl ReadFile for SmallFileV0 {
137    fn read(&self, pos: usize, size: usize) -> Result<Vec<u8>, FileError> {
138        if size == 0 {
139            return Err(FileError::InvalidArgument);
140        }
141        if pos + size > self.content.len() {
142            return Err(FileError::EndOfFile);
143        }
144        Ok(self.content[pos..pos + size].to_vec())
145    }
146    fn get_all_blocks_ids(&self) -> Result<Vec<ObjectId>, FileError> {
147        unimplemented!();
148    }
149}
150
151/// A RandomAccessFile in memory. This is not used to serialize data
152pub struct RandomAccessFile {
153    //storage: Arc<&'a dyn BlockStorage>,
154    store: Arc<Store>,
155    /// accurate once saved or opened
156    meta: RandomAccessFileMeta,
157
158    //meta_object_id: Option<BlockId>,
159    //content_block_id: Option<BlockId>,
160    /// keeps the deduplicated blocks' IDs, used for async writes
161    block_contents: HashMap<BlockKey, BlockId>,
162
163    /// Blocks of the Object (nodes of the tree). Only used when writing asynchronously, before saving.
164    blocks: Vec<(BlockId, BlockKey)>,
165
166    /// When an id is present, the File is opened in Read mode, and cannot be saved.
167    id: Option<ObjectId>,
168    key: Option<ObjectKey>,
169
170    content_block: Option<(BlockId, BlockKey)>,
171
172    // used for writes
173    conv_key: Option<[u8; 32]>,
174    remainder: Vec<u8>,
175    size: usize,
176}
177
178impl ReadFile for RandomAccessFile {
179    fn get_all_blocks_ids(&self) -> Result<Vec<ObjectId>, FileError> {
180        if self.id.is_none() {
181            unimplemented!();
182        }
183        let mut res = Vec::with_capacity(4);
184        let _: Vec<()> = self
185            .blocks
186            .iter()
187            .map(|(id, _)| res.push(id.clone()))
188            .collect();
189
190        recurse_tree(
191            &self.store,
192            self.content_block.as_ref().unwrap().clone(),
193            &mut res,
194            self.meta.depth(),
195        )?;
196
197        fn recurse_tree(
198            store: &Store,
199            current_block_id_key: (Digest, SymKey),
200            res: &mut Vec<Digest>,
201            level: u8,
202        ) -> Result<(), FileError> {
203            res.push(current_block_id_key.0);
204            if level > 0 {
205                let tree_block = store.get(&current_block_id_key.0)?;
206                let (children, content) = tree_block.read(&current_block_id_key.1)?;
207                if children.is_empty() || content.len() > 0 {
208                    return Err(FileError::BlockDeserializeError);
209                }
210
211                for child in children {
212                    recurse_tree(store, child, res, level - 1)?;
213                }
214            }
215            Ok(())
216        }
217        Ok(res)
218    }
219
220    /// reads at most one block from the file. the returned vector should be tested for size. it might be smaller than what you asked for.
221    /// `pos`ition can be anywhere in the file.
222    //TODO: parallelize decryption on multi threads (cores)
223    fn read(&self, pos: usize, mut size: usize) -> Result<Vec<u8>, FileError> {
224        if size == 0 {
225            return Err(FileError::InvalidArgument);
226        }
227        if self.id.is_some() {
228            let total = self.meta.total_size() as usize;
229            if pos > total {
230                return Err(FileError::EndOfFile);
231            }
232            size = min(total - pos, size);
233            let mut current_block_id_key = self.content_block.as_ref().unwrap().clone();
234
235            let depth = self.meta.depth();
236            let arity = self.meta.arity();
237
238            let mut level_pos = pos;
239            for level in 0..depth {
240                let tree_block = self.store.get(&current_block_id_key.0)?;
241                let (children, content) = tree_block.read(&current_block_id_key.1)?;
242                if children.is_empty() || content.len() > 0 {
243                    return Err(FileError::BlockDeserializeError);
244                }
245                let factor = (arity as usize).pow(depth as u32 - level as u32 - 1)
246                    * self.meta.chunk_size() as usize;
247                let level_index = pos / factor;
248                if level_index >= children.len() {
249                    return Err(FileError::EndOfFile);
250                }
251                current_block_id_key = (children[level_index]).clone();
252                level_pos = pos as usize % factor;
253            }
254
255            let content_block = self.store.get(&current_block_id_key.0)?;
256            //log_debug!("CONTENT BLOCK SIZE {}", content_block.size());
257
258            let (children, content) = content_block.read(&current_block_id_key.1)?;
259
260            if children.is_empty() && content.len() > 0 {
261                //log_debug!("CONTENT SIZE {}", content.len());
262
263                if level_pos >= content.len() {
264                    return Err(FileError::EndOfFile);
265                }
266                let end = min(content.len(), level_pos + size);
267                return Ok(content[level_pos..end].to_vec());
268            } else {
269                return Err(FileError::BlockDeserializeError);
270            }
271        } else {
272            // hasn't been saved yet, we can use the self.blocks as a flat array and the remainder too
273            let factor = self.meta.chunk_size() as usize;
274            let index = pos / factor as usize;
275            let level_pos = pos % factor as usize;
276            let remainder_pos = self.blocks.len() * factor;
277            if pos >= remainder_pos {
278                let pos_in_remainder = pos - remainder_pos;
279                if self.remainder.len() > 0 && pos_in_remainder < self.remainder.len() {
280                    let end = min(self.remainder.len(), pos_in_remainder + size);
281                    return Ok(self.remainder[pos_in_remainder..end].to_vec());
282                } else {
283                    return Err(FileError::EndOfFile);
284                }
285            }
286            //log_debug!("{} {} {} {}", index, self.blocks.len(), factor, level_pos);
287            if index >= self.blocks.len() {
288                return Err(FileError::EndOfFile);
289            }
290            let block = &self.blocks[index];
291            let content_block = self.store.get(&block.0)?;
292            let (children, content) = content_block.read(&block.1)?;
293            if children.is_empty() && content.len() > 0 {
294                //log_debug!("CONTENT SIZE {}", content.len());
295
296                if level_pos >= content.len() {
297                    return Err(FileError::EndOfFile);
298                }
299                let end = min(content.len(), level_pos + size);
300                return Ok(content[level_pos..end].to_vec());
301            } else {
302                return Err(FileError::BlockDeserializeError);
303            }
304        }
305    }
306}
307
308impl RandomAccessFile {
309    pub fn meta(&self) -> &RandomAccessFileMeta {
310        &self.meta
311    }
312
313    pub fn id(&self) -> &Option<ObjectId> {
314        &self.id
315    }
316
317    pub fn key(&self) -> &Option<ObjectKey> {
318        &self.key
319    }
320
321    fn make_block(
322        mut content: Vec<u8>,
323        conv_key: &[u8; blake3::OUT_LEN],
324        children: Vec<ObjectId>,
325        already_existing: &mut HashMap<BlockKey, BlockId>,
326        store: &Store,
327    ) -> Result<(BlockId, BlockKey), StorageError> {
328        let key_hash = blake3::keyed_hash(conv_key, &content);
329
330        let key_slice = key_hash.as_bytes();
331        let key = SymKey::ChaCha20Key(key_slice.clone());
332        let it = already_existing.get(&key);
333        if it.is_some() {
334            return Ok((*it.unwrap(), key));
335        }
336        let nonce = [0u8; 12];
337        let mut cipher = ChaCha20::new(key_slice.into(), &nonce.into());
338        //let mut content_enc = Vec::from(content);
339        let mut content_enc_slice = &mut content.as_mut_slice();
340        cipher.apply_keystream(&mut content_enc_slice);
341
342        let mut block = Block::new_random_access(children, content, None);
343        //log_debug!(">>> make_block random access: {}", block.id());
344        //log_debug!("!! children: ({}) {:?}", children.len(), children);
345
346        let id = block.get_and_save_id();
347        already_existing.insert(key.clone(), id);
348        //log_debug!("putting *** {}", id);
349        store.put(&block)?;
350        Ok((id, key))
351    }
352
353    fn make_parent_block(
354        conv_key: &[u8; blake3::OUT_LEN],
355        children: Vec<(BlockId, BlockKey)>,
356        already_existing: &mut HashMap<BlockKey, BlockId>,
357        store: &Store,
358    ) -> Result<(BlockId, BlockKey), StorageError> {
359        let mut ids: Vec<BlockId> = Vec::with_capacity(children.len());
360        let mut keys: Vec<BlockKey> = Vec::with_capacity(children.len());
361        children.iter().for_each(|child| {
362            ids.push(child.0);
363            keys.push(child.1.clone());
364        });
365        let content = ChunkContentV0::InternalNode(keys);
366        let content_ser = serde_bare::to_vec(&content).unwrap();
367
368        Self::make_block(content_ser, conv_key, ids, already_existing, store)
369    }
370
371    /// Build tree from leaves, returns parent nodes
372    fn make_tree(
373        already_existing: &mut HashMap<BlockKey, BlockId>,
374        leaves: &[(BlockId, BlockKey)],
375        conv_key: &ChaCha20Key,
376        arity: u16,
377        store: &Store,
378    ) -> Result<(BlockId, BlockKey), StorageError> {
379        let mut parents: Vec<(BlockId, BlockKey)> = vec![];
380        let mut chunks = leaves.chunks(arity as usize);
381        while let Some(nodes) = chunks.next() {
382            //log_debug!("making parent");
383            parents.push(Self::make_parent_block(
384                conv_key,
385                nodes.to_vec(),
386                already_existing,
387                store,
388            )?);
389        }
390        //log_debug!("level with {} parents", parents.len());
391
392        if 1 < parents.len() {
393            return Self::make_tree(already_existing, parents.as_slice(), conv_key, arity, store);
394        }
395        Ok(parents[0].clone())
396    }
397
398    /// returns content_block id/key pair, and root_block id/key pair
399    fn save_(
400        already_existing: &mut HashMap<BlockKey, BlockId>,
401        blocks: &[(BlockId, BlockKey)],
402        meta: &mut RandomAccessFileMeta,
403        conv_key: &ChaCha20Key,
404        store: &Store,
405    ) -> Result<((BlockId, BlockKey), (BlockId, BlockKey)), FileError> {
406        let leaf_blocks_nbr = blocks.len();
407        let arity = meta.arity();
408
409        let mut depth: u8 = u8::MAX;
410        for i in 0..u8::MAX {
411            if leaf_blocks_nbr <= (arity as usize).pow(i.into()) {
412                depth = i;
413                break;
414            }
415        }
416        if depth == u8::MAX {
417            return Err(FileError::TooBig);
418        }
419        meta.set_depth(depth);
420        //log_debug!("depth={} leaves={}", depth, leaf_blocks_nbr);
421
422        let content_block = if depth == 0 {
423            assert!(blocks.len() == 1);
424            blocks[0].clone()
425        } else {
426            // we create the tree
427            Self::make_tree(already_existing, &blocks, &conv_key, arity, store)?
428        };
429
430        let meta_object = Object::new_with_convergence_key(
431            ObjectContent::V0(ObjectContentV0::RandomAccessFileMeta(meta.clone())),
432            None,
433            store_valid_value_size(meta.chunk_size() as usize),
434            conv_key,
435        );
436        //log_debug!("saving meta object");
437        _ = meta_object.save(store)?;
438
439        // creating the root block that contains as first child the meta_object, and as second child the content_block
440        // it is added to storage in make_parent_block
441        //log_debug!("saving root block");
442        let root_block = Self::make_parent_block(
443            conv_key,
444            vec![
445                (meta_object.id(), meta_object.key().unwrap()),
446                content_block.clone(),
447            ],
448            already_existing,
449            store,
450        )?;
451        Ok((content_block, root_block))
452    }
453
454    /// Creates a new file based on a content that is fully known at the time of creation.
455    ///
456    /// If you want to stream progressively the content into the new file, you should use new_empty(), write() and save() instead
457    pub fn new_from_slice(
458        content: &[u8],
459        block_size: usize,
460        content_type: String,
461        metadata: Vec<u8>,
462        store: Arc<Store>,
463    ) -> Result<RandomAccessFile, FileError> {
464        //let max_block_size = store_max_value_size();
465        let valid_block_size = store_valid_value_size(block_size) - BLOCK_EXTRA;
466
467        let arity = ((valid_block_size) / CHILD_SIZE) as u16;
468
469        let total_size = content.len() as u64;
470
471        let mut conv_key = Object::convergence_key(&store);
472
473        let mut blocks: Vec<(BlockId, BlockKey)> = vec![];
474
475        let mut already_existing: HashMap<BlockKey, BlockId> = HashMap::new();
476
477        //log_debug!("making the leaves");
478        for chunk in content.chunks(valid_block_size) {
479            let data_chunk = ChunkContentV0::DataChunk(chunk.to_vec());
480            let content_ser = serde_bare::to_vec(&data_chunk).unwrap();
481            blocks.push(Self::make_block(
482                content_ser,
483                &conv_key,
484                vec![],
485                &mut already_existing,
486                &store,
487            )?);
488        }
489        assert_eq!(
490            (total_size as usize + valid_block_size - 1) / valid_block_size,
491            blocks.len()
492        );
493
494        let mut meta = RandomAccessFileMeta::V0(RandomAccessFileMetaV0 {
495            content_type,
496            metadata,
497            chunk_size: valid_block_size as u32,
498            total_size,
499            arity,
500            depth: 0,
501        });
502
503        let (content_block, root_block) =
504            Self::save_(&mut already_existing, &blocks, &mut meta, &conv_key, &store)?;
505
506        conv_key.zeroize();
507
508        Ok(Self {
509            store,
510            meta,
511            block_contents: HashMap::new(), // not used in this case
512            blocks: vec![],                 // not used in this case
513            id: Some(root_block.0.clone()),
514            key: Some(root_block.1.clone()),
515            content_block: Some(content_block),
516            conv_key: None,    // not used in this case
517            remainder: vec![], // not used in this case
518            size: 0,           // not used in this case
519        })
520    }
521
522    pub fn new_empty(
523        block_size: usize,
524        content_type: String,
525        metadata: Vec<u8>,
526        store: Arc<Store>,
527    ) -> Self {
528        let valid_block_size = store_valid_value_size(block_size) - BLOCK_EXTRA;
529
530        let arity = ((valid_block_size) / CHILD_SIZE) as u16;
531
532        let meta = RandomAccessFileMeta::V0(RandomAccessFileMetaV0 {
533            content_type,
534            metadata,
535            chunk_size: valid_block_size as u32,
536            arity,
537            total_size: 0, // will be filled in later, during save
538            depth: 0,      // will be filled in later, during save
539        });
540
541        Self {
542            store: Arc::clone(&store),
543            meta,
544            block_contents: HashMap::new(),
545            blocks: vec![],
546            id: None,
547            key: None,
548            content_block: None,
549            conv_key: Some(Object::convergence_key(&store)),
550            remainder: vec![],
551            size: 0,
552        }
553    }
554
555    /// Appends some data at the end of the file currently created with new_empty() and not saved yet.
556    /// you can call it many times. Don't forget to eventually call save()
557    pub fn write(&mut self, data: &[u8]) -> Result<(), FileError> {
558        if self.id.is_some() {
559            return Err(FileError::AlreadySaved);
560        }
561        let remainder = self.remainder.len();
562        let chunk_size = self.meta.chunk_size() as usize;
563        let mut pos: usize = 0;
564        let conv_key = self.conv_key.unwrap();
565        // TODO: provide an option to search in storage for already existing, when doing a resume of previously aborted write
566        let mut already_existing: HashMap<BlockKey, BlockId> = HashMap::new();
567
568        if remainder > 0 {
569            if data.len() >= chunk_size - remainder {
570                let mut new_block = Vec::with_capacity(chunk_size);
571                new_block.append(&mut self.remainder);
572                pos = chunk_size - remainder;
573                self.size += chunk_size;
574                //log_debug!("size += chunk_size {} {}", self.size, chunk_size);
575                new_block.extend(data[0..pos].iter());
576                assert_eq!(new_block.len(), chunk_size);
577                let data_chunk = ChunkContentV0::DataChunk(new_block);
578                let content_ser = serde_bare::to_vec(&data_chunk).unwrap();
579                self.blocks.push(Self::make_block(
580                    content_ser,
581                    &conv_key,
582                    vec![],
583                    &mut already_existing,
584                    &self.store,
585                )?);
586            } else {
587                // not enough data to create a new block
588                self.remainder.extend(data.iter());
589                return Ok(());
590            }
591        } else if data.len() < chunk_size {
592            self.remainder = Vec::from(data);
593            return Ok(());
594        }
595
596        for chunk in data[pos..].chunks(chunk_size) {
597            if chunk.len() == chunk_size {
598                self.size += chunk_size;
599                //log_debug!("size += chunk_size {} {}", self.size, chunk_size);
600                let data_chunk = ChunkContentV0::DataChunk(chunk.to_vec());
601                let content_ser = serde_bare::to_vec(&data_chunk).unwrap();
602                self.blocks.push(Self::make_block(
603                    content_ser,
604                    &conv_key,
605                    vec![],
606                    &mut already_existing,
607                    &self.store,
608                )?);
609            } else {
610                self.remainder = Vec::from(chunk);
611                return Ok(());
612            }
613        }
614        Ok(())
615    }
616
617    pub fn save(&mut self) -> Result<ObjectId, FileError> {
618        if self.id.is_some() {
619            return Err(FileError::AlreadySaved);
620        }
621        // save the remainder, if any.
622        if self.remainder.len() > 0 {
623            self.size += self.remainder.len();
624            //log_debug!("size += remainder {} {}", self.size, self.remainder.len());
625            let mut remainder = Vec::with_capacity(self.remainder.len());
626            remainder.append(&mut self.remainder);
627            let data_chunk = ChunkContentV0::DataChunk(remainder);
628            let content_ser = serde_bare::to_vec(&data_chunk).unwrap();
629            self.blocks.push(Self::make_block(
630                content_ser,
631                &self.conv_key.unwrap(),
632                vec![],
633                &mut HashMap::new(),
634                &self.store,
635            )?);
636        }
637
638        self.meta.set_total_size(self.size as u64);
639
640        let mut already_existing: HashMap<BlockKey, BlockId> = HashMap::new();
641        let (content_block, root_block) = Self::save_(
642            &mut already_existing,
643            &self.blocks,
644            &mut self.meta,
645            self.conv_key.as_ref().unwrap(),
646            &self.store,
647        )?;
648
649        self.conv_key.as_mut().unwrap().zeroize();
650        self.conv_key = None;
651
652        self.id = Some(root_block.0);
653        self.key = Some(root_block.1.clone());
654        self.content_block = Some(content_block);
655
656        self.blocks = vec![];
657        self.blocks.shrink_to_fit();
658
659        Ok(root_block.0)
660    }
661
662    pub fn reference(&self) -> Option<ObjectRef> {
663        if self.key.is_some() && self.id.is_some() {
664            Some(ObjectRef::from_id_key(
665                self.id.unwrap(),
666                self.key.as_ref().unwrap().clone(),
667            ))
668        } else {
669            None
670        }
671    }
672
673    /// Opens a file for read purpose.
674    pub fn open(
675        id: ObjectId,
676        key: SymKey,
677        store: Arc<Store>,
678    ) -> Result<RandomAccessFile, FileError> {
679        // load root block
680        let root_block = store.get(&id)?;
681
682        if root_block.children().len() != 2
683            || *root_block.content().commit_header_obj() != CommitHeaderObject::RandomAccess
684        {
685            return Err(FileError::BlockDeserializeError);
686        }
687
688        let (root_sub_blocks, _) = root_block.read(&key)?;
689
690        // load meta object (first one in root block)
691        let meta_object = Object::load(
692            root_sub_blocks[0].0,
693            Some(root_sub_blocks[0].1.clone()),
694            &store,
695        )?;
696
697        let meta = match meta_object.content_v0()? {
698            ObjectContentV0::RandomAccessFileMeta(meta) => meta,
699            _ => return Err(FileError::InvalidChildren),
700        };
701
702        Ok(RandomAccessFile {
703            store,
704            meta,
705            block_contents: HashMap::new(), // not used in this case
706            blocks: vec![(id, SymKey::nil()), (root_sub_blocks[0].0, SymKey::nil())], // not used in this case
707            id: Some(id),
708            key: Some(key),
709            content_block: Some(root_sub_blocks[1].clone()),
710            conv_key: None,
711            remainder: vec![],
712            size: 0,
713        })
714    }
715
716    pub fn blocks(&self) -> impl Iterator<Item = Block> + '_ {
717        self.blocks
718            .iter()
719            .map(|key| self.store.get(&key.0).unwrap())
720    }
721
722    /// Size once encoded, before deduplication. Only available before save()
723    pub fn size(&self) -> usize {
724        let mut total = 0;
725        self.blocks().for_each(|b| total += b.size());
726        total
727    }
728
729    /// Real size on disk
730    pub fn dedup_size(&self) -> usize {
731        let mut total = 0;
732        self.block_contents
733            .values()
734            .for_each(|b| total += self.store.get(b).unwrap().size());
735        total
736    }
737
738    pub fn depth(&self) -> Result<u8, NgError> {
739        Ok(self.meta.depth())
740
741        // unimplemented!();
742        // if self.key().is_none() {
743        //     return Err(ObjectParseError::MissingRootKey);
744        // }
745        // let parents = vec![(self.id(), self.key().unwrap())];
746        // Self::collect_leaves(
747        //     &self.blocks,
748        //     &parents,
749        //     self.blocks.len() - 1,
750        //     &mut None,
751        //     &mut None,
752        //     &self.block_contents,
753        // )
754    }
755}
756
757impl fmt::Display for RandomAccessFile {
758    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
759        writeln!(
760            f,
761            "====== File ID {}",
762            self.id
763                .map_or("NOT SAVED".to_string(), |i| format!("{}", i))
764        )?;
765        writeln!(
766            f,
767            "== Key:    {}",
768            self.key
769                .as_ref()
770                .map_or("None".to_string(), |k| format!("{}", k))
771        )?;
772        writeln!(f, "== depth:        {}", self.meta.depth())?;
773        writeln!(f, "== arity:        {}", self.meta.arity())?;
774        writeln!(f, "== chunk_size:   {}", self.meta.chunk_size())?;
775        writeln!(f, "== total_size:   {}", self.meta.total_size())?;
776        writeln!(f, "== content_type: {}", self.meta.content_type())?;
777        writeln!(f, "== metadata len: {}", self.meta.metadata().len())?;
778        if self.id.is_none() {
779            writeln!(f, "== blocks to save: {}", self.blocks.len())?;
780        }
781        Ok(())
782    }
783}
784
785#[cfg(test)]
786mod test {
787
788    use time::Instant;
789
790    use crate::file::*;
791    use std::io::BufReader;
792    use std::io::Read;
793
794    /// Checks that a content that does fit in one block, creates an arity of 0
795    #[test]
796    pub fn test_depth_0() {
797        let block_size = store_max_value_size();
798        //store_valid_value_size(0)
799
800        ////// 1 MB of data!
801        let data_size = block_size - BLOCK_EXTRA;
802
803        let store = Store::dummy_public_v0();
804        log_debug!("creating 1MB of data");
805        let content: Vec<u8> = vec![99; data_size];
806
807        log_debug!("creating random access file with that data");
808        let file: RandomAccessFile = RandomAccessFile::new_from_slice(
809            &content,
810            block_size,
811            "text/plain".to_string(),
812            vec![],
813            Arc::clone(&store),
814        )
815        .expect("new_from_slice");
816        log_debug!("{}", file);
817
818        let id = file.id.as_ref().unwrap().clone();
819
820        let file_size = file.size();
821        log_debug!("file size to save : {}", file_size);
822
823        log_debug!("data size: {}", data_size);
824
825        let read_content = file.read(0, data_size).expect("reading all");
826        assert_eq!(read_content, content);
827
828        let read_content2 = file.read(0, data_size + 1);
829        assert_eq!(read_content2.unwrap().len(), 1048564);
830
831        let read_content = file.read(data_size - 9, 9).expect("reading end");
832        assert_eq!(read_content, vec![99, 99, 99, 99, 99, 99, 99, 99, 99]);
833
834        let read_content = file.read(data_size - 9, 10);
835        assert_eq!(read_content, Ok(vec![99, 99, 99, 99, 99, 99, 99, 99, 99]));
836
837        // log_debug!(
838        //     "overhead: {} - {}%",
839        //     file_size - data_size,
840        //     ((file_size - data_size) * 100) as f32 / data_size as f32
841        // );
842
843        // let dedup_size = file.dedup_size();
844        // log_debug!(
845        //     "dedup compression: {} - {}%",
846        //     data_size - dedup_size,
847        //     ((data_size - dedup_size) * 100) as f32 / data_size as f32
848        // );
849
850        // log_debug!("number of blocks : {}", file.blocks.len());
851        // assert_eq!(
852        //     file.blocks.len(),
853        //     MAX_ARITY_LEAVES * (MAX_ARITY_LEAVES + 1) * MAX_ARITY_LEAVES + MAX_ARITY_LEAVES + 1
854        // );
855        assert_eq!(file.depth(), Ok(0));
856        assert_eq!(store.len(), Ok(3));
857
858        let file = RandomAccessFile::open(id, file.key.unwrap(), store).expect("re open");
859
860        log_debug!("{}", file);
861
862        let read_content = file.read(0, data_size).expect("reading all after re open");
863        assert_eq!(read_content, content);
864    }
865
866    /// Checks that a content that doesn't fit in all the children of first level in tree
867    #[ignore]
868    #[test]
869    pub fn test_depth_1() {
870        const MAX_ARITY_LEAVES: usize = 15887;
871        const MAX_DATA_PAYLOAD_SIZE: usize = 1048564;
872
873        ////// 16 GB of data!
874        let data_size = MAX_ARITY_LEAVES * MAX_DATA_PAYLOAD_SIZE;
875
876        let store = Store::dummy_public_v0();
877        log_debug!("creating 16GB of data");
878
879        let content: Vec<u8> = vec![99; data_size];
880
881        log_debug!("creating random access file with that data");
882        let file: RandomAccessFile = RandomAccessFile::new_from_slice(
883            &content,
884            store_max_value_size(),
885            "text/plain".to_string(),
886            vec![],
887            Arc::clone(&store),
888        )
889        .expect("new_from_slice");
890        log_debug!("{}", file);
891
892        let _id = file.id.as_ref().unwrap().clone();
893
894        log_debug!("data size: {}", data_size);
895
896        assert_eq!(file.depth(), Ok(1));
897
898        assert_eq!(store.len(), Ok(4));
899    }
900
901    /// Checks that a content that doesn't fit in all the children of first level in tree
902    #[ignore]
903    #[test]
904    pub fn test_depth_2() {
905        const MAX_ARITY_LEAVES: usize = 15887;
906        const MAX_DATA_PAYLOAD_SIZE: usize = 1048564;
907
908        ////// 16 GB of data!
909        let data_size = MAX_ARITY_LEAVES * MAX_DATA_PAYLOAD_SIZE + 1;
910
911        let store = Store::dummy_public_v0();
912        log_debug!("creating 16GB of data");
913        let content: Vec<u8> = vec![99; data_size];
914
915        log_debug!("creating file with that data");
916        let file: RandomAccessFile = RandomAccessFile::new_from_slice(
917            &content,
918            store_max_value_size(),
919            "text/plain".to_string(),
920            vec![],
921            Arc::clone(&store),
922        )
923        .expect("new_from_slice");
924        log_debug!("{}", file);
925
926        let file_size = file.size();
927        log_debug!("file size: {}", file_size);
928
929        log_debug!("data size: {}", data_size);
930
931        assert_eq!(file.depth().unwrap(), 2);
932
933        assert_eq!(store.len(), Ok(7));
934    }
935
936    /// Checks that a content that doesn't fit in all the children of first level in tree
937    #[test]
938    pub fn test_depth_3() {
939        const MAX_ARITY_LEAVES: usize = 61;
940        const MAX_DATA_PAYLOAD_SIZE: usize = 4084;
941
942        ////// 900 MB of data!
943        let data_size =
944            MAX_ARITY_LEAVES * MAX_ARITY_LEAVES * MAX_ARITY_LEAVES * MAX_DATA_PAYLOAD_SIZE;
945
946        let store = Store::dummy_public_v0();
947        log_debug!("creating 900MB of data");
948        let content: Vec<u8> = vec![99; data_size];
949
950        log_debug!("creating file with that data");
951        let file: RandomAccessFile = RandomAccessFile::new_from_slice(
952            &content,
953            store_valid_value_size(0),
954            "text/plain".to_string(),
955            vec![],
956            Arc::clone(&store),
957        )
958        .expect("new_from_slice");
959        log_debug!("{}", file);
960
961        let file_size = file.size();
962        log_debug!("file size: {}", file_size);
963
964        let read_content = file.read(0, data_size).expect("reading all");
965        assert_eq!(read_content.len(), MAX_DATA_PAYLOAD_SIZE);
966
967        let read_content = file.read(9000, 10000).expect("reading 10k");
968        assert_eq!(read_content, vec![99; 3252]);
969
970        // log_debug!("data size: {}", data_size);
971        // log_debug!(
972        //     "overhead: {} - {}%",
973        //     file_size - data_size,
974        //     ((file_size - data_size) * 100) as f32 / data_size as f32
975        // );
976
977        // let dedup_size = file.dedup_size();
978        // log_debug!(
979        //     "dedup compression: {} - {}%",
980        //     data_size - dedup_size,
981        //     ((data_size - dedup_size) * 100) as f32 / data_size as f32
982        // );
983
984        // log_debug!("number of blocks : {}", file.blocks.len());
985        // assert_eq!(
986        //     file.blocks.len(),
987        //     MAX_ARITY_LEAVES * (MAX_ARITY_LEAVES + 1) * MAX_ARITY_LEAVES + MAX_ARITY_LEAVES + 1
988        // );
989        assert_eq!(file.depth().unwrap(), 3);
990
991        assert_eq!(store.len(), Ok(6));
992    }
993
994    /// Checks that a content that doesn't fit in all the children of first level in tree
995    #[ignore]
996    #[test]
997    pub fn test_depth_4() {
998        const MAX_ARITY_LEAVES: usize = 61;
999        const MAX_DATA_PAYLOAD_SIZE: usize = 4084;
1000
1001        ////// 52GB of data!
1002        let data_size = MAX_ARITY_LEAVES
1003            * MAX_ARITY_LEAVES
1004            * MAX_ARITY_LEAVES
1005            * MAX_ARITY_LEAVES
1006            * MAX_DATA_PAYLOAD_SIZE;
1007
1008        let store = Store::dummy_public_v0();
1009        log_debug!("creating 55GB of data");
1010        let content: Vec<u8> = vec![99; data_size];
1011
1012        log_debug!("creating file with that data");
1013        let file: RandomAccessFile = RandomAccessFile::new_from_slice(
1014            &content,
1015            store_valid_value_size(0),
1016            "text/plain".to_string(),
1017            vec![],
1018            Arc::clone(&store),
1019        )
1020        .expect("new_from_slice");
1021
1022        log_debug!("{}", file);
1023
1024        let file_size = file.size();
1025        log_debug!("file size: {}", file_size);
1026
1027        log_debug!("data size: {}", data_size);
1028
1029        assert_eq!(file.depth().unwrap(), 4);
1030
1031        assert_eq!(store.len(), Ok(7));
1032    }
1033
1034    /// Test async write to a file all at once
1035    #[test]
1036    pub fn test_write_all_at_once() {
1037        let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
1038        let mut reader = BufReader::new(f);
1039        let mut img_buffer: Vec<u8> = Vec::new();
1040        reader
1041            .read_to_end(&mut img_buffer)
1042            .expect("read of test.jpg");
1043
1044        let store = Store::dummy_public_v0();
1045
1046        log_debug!("creating file with the JPG content");
1047        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1048            store_max_value_size(), //store_valid_value_size(0),//
1049            "image/jpeg".to_string(),
1050            vec![],
1051            store,
1052        );
1053
1054        log_debug!("{}", file);
1055
1056        file.write(&img_buffer).expect("write all at once");
1057
1058        // !!! all those tests work only because store_max_value_size() is bigger than the actual size of the JPEG file. so it fits in one block.
1059
1060        assert_eq!(
1061            file.read(0, img_buffer.len()).expect("read before save"),
1062            img_buffer
1063        );
1064
1065        // asking too much, receiving just enough
1066        assert_eq!(
1067            file.read(0, img_buffer.len() + 1)
1068                .expect("read before save"),
1069            img_buffer
1070        );
1071
1072        // // reading too far, well behind the size of the JPG
1073        // assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
1074
1075        assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]);
1076
1077        // // reading one byte after the end of the file size.
1078        // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
1079
1080        assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
1081
1082        file.save().expect("save");
1083
1084        let res = file.read(0, img_buffer.len()).expect("read all");
1085        assert_eq!(res, img_buffer);
1086
1087        // // asking too much, receiving an error, as now we know the total size of file, and we check it
1088        // assert_eq!(
1089        //     file.read(0, img_buffer.len() + 1),
1090        //     Err(FileError::EndOfFile)
1091        // );
1092
1093        // reading too far, well behind the size of the JPG
1094        assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
1095
1096        assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]);
1097
1098        // // reading one byte after the end of the file size.
1099        // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
1100
1101        assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
1102    }
1103
1104    /// Test async write to a file by increments
1105    #[test]
1106    pub fn test_write_by_increments() {
1107        let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
1108        let mut reader = BufReader::new(f);
1109        let mut img_buffer: Vec<u8> = Vec::new();
1110        reader
1111            .read_to_end(&mut img_buffer)
1112            .expect("read of test.jpg");
1113
1114        let store = Store::dummy_public_v0();
1115
1116        log_debug!("creating file with the JPG content");
1117        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1118            store_max_value_size(), //store_valid_value_size(0),//
1119            "image/jpeg".to_string(),
1120            vec![],
1121            store,
1122        );
1123
1124        log_debug!("{}", file);
1125
1126        for chunk in img_buffer.chunks(1000) {
1127            file.write(chunk).expect("write a chunk");
1128        }
1129
1130        assert_eq!(
1131            file.read(0, img_buffer.len()).expect("read before save"),
1132            img_buffer
1133        );
1134
1135        // asking too much, receiving just enough
1136        assert_eq!(
1137            file.read(0, img_buffer.len() + 1)
1138                .expect("read before save"),
1139            img_buffer
1140        );
1141
1142        // reading too far, well behind the size of the JPG
1143        assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
1144
1145        assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]);
1146
1147        // reading one byte after the end of the file size.
1148        assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
1149
1150        assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
1151
1152        file.save().expect("save");
1153
1154        // this works only because store_max_value_size() is bigger than the actual size of the JPEG file. so it fits in one block.
1155        let res = file.read(0, img_buffer.len()).expect("read all");
1156
1157        assert_eq!(res, img_buffer);
1158
1159        // // asking too much, receiving an error, as now we know the total size of file, and we check it
1160        // assert_eq!(
1161        //     file.read(0, img_buffer.len() + 1),
1162        //     Err(FileError::EndOfFile)
1163        // );
1164
1165        // reading too far, well behind the size of the JPG
1166        assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
1167
1168        assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]);
1169
1170        // // reading one byte after the end of the file size.
1171        // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
1172
1173        assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
1174    }
1175
1176    /// Test async write to a file by increments small blocks
1177    #[test]
1178    pub fn test_write_by_increments_small_blocks() {
1179        let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
1180        let mut reader = BufReader::new(f);
1181        let mut img_buffer: Vec<u8> = Vec::new();
1182        reader
1183            .read_to_end(&mut img_buffer)
1184            .expect("read of test.jpg");
1185
1186        let store = Store::dummy_public_v0();
1187
1188        log_debug!("creating file with the JPG content");
1189        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1190            store_valid_value_size(0),
1191            "image/jpeg".to_string(),
1192            vec![],
1193            store,
1194        );
1195
1196        log_debug!("{}", file);
1197
1198        let first_block_content = img_buffer[0..4084].to_vec();
1199
1200        for chunk in img_buffer.chunks(1000) {
1201            file.write(chunk).expect("write a chunk");
1202        }
1203
1204        log_debug!("{}", file);
1205
1206        assert_eq!(
1207            file.read(0, img_buffer.len()).expect("read before save"),
1208            first_block_content
1209        );
1210
1211        // asking too much, receiving just enough
1212        assert_eq!(
1213            file.read(0, img_buffer.len() + 1)
1214                .expect("read before save"),
1215            first_block_content
1216        );
1217
1218        // reading too far, well behind the size of the JPG
1219        assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
1220
1221        assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]);
1222
1223        // // reading one byte after the end of the file size.
1224        // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
1225
1226        assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
1227
1228        file.save().expect("save");
1229
1230        log_debug!("{}", file);
1231
1232        assert_eq!(img_buffer.len(), file.meta.total_size() as usize);
1233
1234        let res = file.read(0, img_buffer.len()).expect("read all");
1235        assert_eq!(res, first_block_content);
1236
1237        // // asking too much, not receiving an error, as we know the total size of file, and return what we can
1238        // assert_eq!(
1239        //     file.read(0, img_buffer.len() + 1),
1240        //     Err(FileError::EndOfFile)
1241        // );
1242
1243        // reading too far, well behind the size of the JPG
1244        assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
1245
1246        assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]);
1247
1248        // // reading one byte after the end of the file size.
1249        // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
1250
1251        assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
1252    }
1253
1254    /// Test async write to a file all at once
1255    #[test]
1256    pub fn test_write_all_at_once_small_blocks() {
1257        let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
1258        let mut reader = BufReader::new(f);
1259        let mut img_buffer: Vec<u8> = Vec::new();
1260        reader
1261            .read_to_end(&mut img_buffer)
1262            .expect("read of test.jpg");
1263
1264        let first_block_content = img_buffer[0..4084].to_vec();
1265
1266        let store = Store::dummy_public_v0();
1267
1268        log_debug!("creating file with the JPG content");
1269        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1270            store_valid_value_size(0),
1271            "image/jpeg".to_string(),
1272            vec![],
1273            store,
1274        );
1275
1276        log_debug!("{}", file);
1277
1278        file.write(&img_buffer).expect("write all at once");
1279
1280        assert_eq!(
1281            file.read(0, img_buffer.len()).expect("read before save"),
1282            first_block_content
1283        );
1284
1285        // asking too much, receiving just enough
1286        assert_eq!(
1287            file.read(0, img_buffer.len() + 1)
1288                .expect("read before save"),
1289            first_block_content
1290        );
1291
1292        // reading too far, well behind the size of the JPG
1293        assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
1294
1295        assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]);
1296
1297        // // reading one byte after the end of the file size.
1298        // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
1299
1300        assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
1301
1302        file.save().expect("save");
1303
1304        let res = file.read(0, img_buffer.len()).expect("read all");
1305        assert_eq!(res, first_block_content);
1306
1307        let res = file.read(10, img_buffer.len() - 10).expect("read all");
1308        assert_eq!(res, first_block_content[10..].to_vec());
1309
1310        // // asking too much, receiving an error, as now we know the total size of file, and we check it
1311        // assert_eq!(
1312        //     file.read(0, img_buffer.len() + 1),
1313        //     Err(FileError::EndOfFile)
1314        // );
1315
1316        // reading too far, well behind the size of the JPG
1317        assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
1318
1319        assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]);
1320
1321        // // reading one byte after the end of the file size.
1322        // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
1323
1324        assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
1325    }
1326
1327    /// Test depth 4 with 52GB of data, but using write in small increments, so the memory burden on the system will be minimal
1328    #[ignore]
1329    #[test]
1330    pub fn test_depth_4_write_small() {
1331        const MAX_ARITY_LEAVES: usize = 61;
1332        const MAX_DATA_PAYLOAD_SIZE: usize = 4084;
1333
1334        ////// 52GB of data!
1335        let data_size = MAX_ARITY_LEAVES
1336            * MAX_ARITY_LEAVES
1337            * MAX_ARITY_LEAVES
1338            * MAX_ARITY_LEAVES
1339            * MAX_DATA_PAYLOAD_SIZE;
1340
1341        // chunks of 5 MB
1342        let chunk_nbr = data_size / 5000000;
1343        let last_chunk = data_size % 5000000;
1344
1345        let store = Store::dummy_public_v0();
1346
1347        log_debug!("creating empty file");
1348        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1349            store_valid_value_size(0),
1350            "image/jpeg".to_string(),
1351            vec![],
1352            Arc::clone(&store),
1353        );
1354
1355        log_debug!("{}", file);
1356
1357        let chunk = vec![99; 5000000];
1358        let last_chunk = vec![99; last_chunk];
1359
1360        for _i in 0..chunk_nbr {
1361            file.write(&chunk).expect("write a chunk");
1362        }
1363
1364        file.write(&last_chunk).expect("write last chunk");
1365
1366        log_debug!("{}", file);
1367
1368        file.save().expect("save");
1369
1370        log_debug!("{}", file);
1371
1372        let file_size = file.size();
1373        log_debug!("file size: {}", file_size);
1374
1375        log_debug!("data size: {}", data_size);
1376
1377        assert_eq!(data_size, file.meta.total_size() as usize);
1378
1379        assert_eq!(file.depth().unwrap(), 4);
1380
1381        assert_eq!(store.len(), Ok(7));
1382    }
1383
1384    /// Test open
1385    #[test]
1386    pub fn test_open() {
1387        let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
1388        let mut reader = BufReader::new(f);
1389        let mut img_buffer: Vec<u8> = Vec::new();
1390        reader
1391            .read_to_end(&mut img_buffer)
1392            .expect("read of test.jpg");
1393
1394        let store = Store::dummy_public_v0();
1395
1396        log_debug!("creating file with the JPG content");
1397        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1398            store_max_value_size(), //store_valid_value_size(0),//
1399            "image/jpeg".to_string(),
1400            vec![],
1401            Arc::clone(&store),
1402        );
1403
1404        log_debug!("{}", file);
1405
1406        for chunk in img_buffer.chunks(1000) {
1407            file.write(chunk).expect("write a chunk");
1408        }
1409
1410        file.save().expect("save");
1411
1412        let file2 = RandomAccessFile::open(file.id().unwrap(), file.key.unwrap(), store)
1413            .expect("reopen file");
1414
1415        // this works only because store_max_value_size() is bigger than the actual size of the JPEG file. so it fits in one block.
1416        let res = file2.read(0, img_buffer.len()).expect("read all");
1417
1418        log_debug!("{}", file2);
1419
1420        assert_eq!(res, img_buffer);
1421
1422        // // asking too much, receiving an error, as now we know the total size of file, and we check it
1423        // assert_eq!(
1424        //     file2.read(0, img_buffer.len() + 1),
1425        //     Err(FileError::EndOfFile)
1426        // );
1427
1428        // reading too far, well behind the size of the JPG
1429        assert_eq!(file2.read(100000, 1), Err(FileError::EndOfFile));
1430
1431        assert_eq!(file2.read(10000, 1).expect("read after save"), vec![41]);
1432
1433        // // reading one byte after the end of the file size.
1434        // assert_eq!(file2.read(29454, 1), Err(FileError::EndOfFile));
1435
1436        assert_eq!(file2.read(29454, 0), Err(FileError::InvalidArgument));
1437    }
1438
1439    /// Test read JPEG file small
1440    #[test]
1441    pub fn test_read_small_file() {
1442        let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
1443        let mut reader = BufReader::new(f);
1444        let mut img_buffer: Vec<u8> = Vec::new();
1445        reader
1446            .read_to_end(&mut img_buffer)
1447            .expect("read of test.jpg");
1448        let len = img_buffer.len();
1449        let content = ObjectContent::new_file_v0_with_content(img_buffer.clone(), "image/jpeg");
1450
1451        let max_object_size = store_max_value_size();
1452        let store = Store::dummy_public_v0();
1453        let mut obj = Object::new(content, None, max_object_size, &store);
1454
1455        log_debug!("{}", obj);
1456
1457        let _ = obj.save_in_test(&store).expect("save");
1458
1459        let file = File::open(obj.id(), obj.key().unwrap(), store).expect("open");
1460
1461        let res = file.read(0, len).expect("read all");
1462
1463        assert_eq!(res, img_buffer);
1464    }
1465
1466    /// Test read JPEG file random access
1467    #[test]
1468    pub fn test_read_random_access_file() {
1469        let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
1470        let mut reader = BufReader::new(f);
1471        let mut img_buffer: Vec<u8> = Vec::new();
1472        reader
1473            .read_to_end(&mut img_buffer)
1474            .expect("read of test.jpg");
1475        let len = img_buffer.len();
1476
1477        let max_object_size = store_max_value_size();
1478        let store = Store::dummy_public_v0();
1479
1480        log_debug!("creating empty file");
1481        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1482            max_object_size,
1483            "image/jpeg".to_string(),
1484            vec![],
1485            Arc::clone(&store),
1486        );
1487
1488        file.write(&img_buffer).expect("write all");
1489
1490        log_debug!("{}", file);
1491
1492        file.save().expect("save");
1493
1494        log_debug!("{}", file);
1495
1496        let file = File::open(
1497            file.id().unwrap(),
1498            file.key().as_ref().unwrap().clone(),
1499            store,
1500        )
1501        .expect("open");
1502
1503        // this only works because we chose a big block size (1MB) so the small JPG file files in one block.
1504        // if not, we would have to call read repeatedly and append the results into a buffer, in order to get the full file
1505        let res = file.read(0, len).expect("read all");
1506
1507        assert_eq!(res, img_buffer);
1508    }
1509
1510    /// Test depth 4, but using write in increments, so the memory burden on the system will be minimal
1511    #[ignore]
1512    #[test]
1513    pub fn test_depth_4_big_write_small() {
1514        let encoding_big_file = Instant::now();
1515
1516        let f = std::fs::File::open("[enter path of a big file here]").expect("open of a big file");
1517        let mut reader = BufReader::new(f);
1518
1519        let store = Store::dummy_public_v0();
1520
1521        log_debug!("creating empty file");
1522        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1523            store_valid_value_size(0),
1524            "image/jpeg".to_string(),
1525            vec![],
1526            store,
1527        );
1528
1529        log_debug!("{}", file);
1530
1531        let mut chunk = [0u8; 1000000];
1532
1533        loop {
1534            let size = reader.read(&mut chunk).expect("read a chunk");
1535            //log_debug!("{}", size);
1536            file.write(&chunk[0..size]).expect("write a chunk");
1537            if size != 1000000 {
1538                break;
1539            }
1540        }
1541
1542        log_debug!("{}", file);
1543
1544        file.save().expect("save");
1545
1546        log_debug!("{}", file);
1547
1548        log_debug!("data size: {}", file.meta.total_size());
1549
1550        //assert_eq!(data_size, file.meta.total_size() as usize);
1551
1552        assert_eq!(file.depth().unwrap(), 4);
1553
1554        log_debug!(
1555            "encoding_big_file took: {} s",
1556            encoding_big_file.elapsed().as_seconds_f32()
1557        );
1558    }
1559
1560    /// Test depth 4 with 2.7GB of data, but using write in increments, so the memory burden on the system will be minimal
1561    #[ignore]
1562    #[test]
1563    pub fn test_depth_4_big_write_big() {
1564        let encoding_big_file = Instant::now();
1565
1566        let f = std::fs::File::open("[enter path of a big file here]").expect("open of a big file");
1567        let mut reader = BufReader::new(f);
1568
1569        let store = Store::dummy_public_v0();
1570
1571        log_debug!("creating empty file");
1572        let mut file: RandomAccessFile = RandomAccessFile::new_empty(
1573            store_max_value_size(),
1574            "image/jpeg".to_string(),
1575            vec![],
1576            store,
1577        );
1578
1579        log_debug!("{}", file);
1580
1581        let mut chunk = [0u8; 2000000];
1582
1583        loop {
1584            let size = reader.read(&mut chunk).expect("read a chunk");
1585            //log_debug!("{}", size);
1586            file.write(&chunk[0..size]).expect("write a chunk");
1587            if size != 2000000 {
1588                break;
1589            }
1590        }
1591
1592        log_debug!("{}", file);
1593
1594        file.save().expect("save");
1595
1596        log_debug!("{}", file);
1597
1598        log_debug!("data size: {}", file.meta.total_size());
1599
1600        //assert_eq!(data_size, file.meta.total_size() as usize);
1601
1602        assert_eq!(file.depth().unwrap(), 1);
1603
1604        log_debug!(
1605            "encoding_big_file took: {} s",
1606            encoding_big_file.elapsed().as_seconds_f32()
1607        );
1608    }
1609}