hydrate_base/
b3f.rs

1//! Basic Binary Block Format (B3F)
2//!
3//! File Format
4//! [4] magic number encoded as u32 (0xBB33FF00)
5//! [4] file tag (arbitrary 4 bytes for user)
6//! [4] version (arbitrary meaning for user, encoded as u32)
7//! [4] block count (encoded as u32)
8//! [8] bytes indicating 0 (0x00)
9//! [8*n] ending offset of block
10//! [x] pad to 16 byte offset
11//! [n*len(n)] data (format/encoding/semantics would be implied by file tag). Each block begins at
12//! [x] pad to 16 byte offset
13//!
14//! Endianness is undefined. Use the magic number to detect if endianness is different between
15//! writer/reader
16//!
17//! This format can be encoded into a block, making this structure hierarchical. In this
18//! case, omit the magic number, and use the file tag to optionally indicate the contents
19//! of the block. (So it becomes a "block tag")
20//!
21//! if you c-cast the range memory from byte 16 to block count * 8, you have an array of u64 of n+1
22//! length where n is number of blocks. Offset for block n is given by array[n]. End of block n is
23//! given by array[n+1]. Size of block n in bytes is given by array[n+1] - array[n]
24//!
25//! Alignment of blocks to 16 bytes promotes reinterpreting bytes i.e. u8 to u64 or __m128 without
26//! tripping over undefined behavior
27
28use std::convert::TryInto;
29use std::io::{Cursor, SeekFrom};
30use std::ops::Range;
31
32const HEADER_SIZE_IN_BYTES: usize = 16;
33const BLOCK_LENGTH_SIZE_IN_BYTES: usize = 8;
34const BLOCK_ALIGNMENT_IN_BYTES: usize = 16;
35
36/// Used to encode data into B3F format
37pub struct B3FWriter<'a> {
38    file_tag: u32,
39    version: u32,
40    blocks: Vec<&'a [u8]>,
41}
42
43impl<'a> B3FWriter<'a> {
44    pub fn new_from_u8_tag(
45        file_tag: [u8; 4],
46        version: u32,
47    ) -> Self {
48        B3FWriter {
49            file_tag: u32::from_ne_bytes(file_tag),
50            version,
51            blocks: Vec::default(),
52        }
53    }
54
55    pub fn new_from_u32_tag(
56        file_tag: u32,
57        version: u32,
58    ) -> Self {
59        B3FWriter {
60            file_tag,
61            version,
62            blocks: Vec::default(),
63        }
64    }
65
66    pub fn add_block(
67        &mut self,
68        data: &'a [u8],
69    ) {
70        self.blocks.push(data);
71    }
72
73    pub fn write<W: std::io::Write>(
74        &self,
75        mut writer: W,
76    ) {
77        //
78        // 16 byte header
79        //
80        writer.write(&0xBB33FF00u32.to_ne_bytes()).unwrap();
81        writer.write(&self.file_tag.to_ne_bytes()).unwrap();
82        writer.write(&self.version.to_ne_bytes()).unwrap();
83        let block_count = self.blocks.len() as u32;
84        writer.write(&block_count.to_ne_bytes()).unwrap();
85
86        //
87        // A single u64 zero + N u64 block end positions
88        //
89        writer.write(&0u64.to_ne_bytes()).unwrap();
90
91        let mut block_begin = 0;
92        for block in &self.blocks {
93            // Determine where the block ends
94            let block_end = block_begin + block.len();
95
96            // Write the ending of the previous block (or 0 for first block)
97            writer.write(&(block_end as u64).to_ne_bytes()).unwrap();
98
99            // Realign to 16 bytes, this is where the next block begins
100            block_begin = ((block_end + BLOCK_ALIGNMENT_IN_BYTES - 1) / BLOCK_ALIGNMENT_IN_BYTES)
101                * BLOCK_ALIGNMENT_IN_BYTES;
102        }
103
104        //
105        // Pad block 0 to start at a 16 byte offset
106        //
107        let data_offset =
108            HEADER_SIZE_IN_BYTES + ((self.blocks.len() + 1) * BLOCK_LENGTH_SIZE_IN_BYTES);
109        if data_offset % 16 == 8 {
110            writer.write(&0u64.to_ne_bytes()).unwrap();
111        } else {
112            assert!(data_offset % 16 == 0);
113        }
114
115        //
116        // Write the blocks
117        //
118        for block in &self.blocks {
119            writer.write(*block).unwrap();
120            if block.len() % 16 != 0 {
121                let required_padding = 16 - block.len() % 16;
122                for _ in 0..required_padding {
123                    writer.write(&0u8.to_ne_bytes()).unwrap();
124                }
125            }
126        }
127    }
128}
129
130pub struct B3FReader {
131    file_tag: [u8; 4],
132    version: u32,
133    block_count: u32,
134}
135
136impl B3FReader {
137    pub fn file_tag_as_u32(&self) -> u32 {
138        u32::from_ne_bytes(self.file_tag.try_into().unwrap())
139    }
140
141    pub fn file_tag_as_u8(&self) -> &[u8] {
142        &self.file_tag
143    }
144
145    pub fn version(&self) -> u32 {
146        self.version
147    }
148
149    pub fn block_count(&self) -> usize {
150        self.block_count as usize
151    }
152
153    pub fn new<T: std::io::Read + std::io::Seek>(reader: &mut T) -> std::io::Result<Option<Self>> {
154        reader.seek(SeekFrom::Start(0))?;
155        let mut bytes = [0u8; 4];
156        reader.read(&mut bytes)?;
157        let magic_number = u32::from_ne_bytes(bytes);
158        if magic_number != 0xBB33FF00 {
159            return Ok(None);
160        }
161
162        reader.read(&mut bytes)?;
163        let file_tag = bytes;
164
165        reader.read(&mut bytes)?;
166        let version = u32::from_ne_bytes(bytes);
167
168        reader.read(&mut bytes)?;
169        let block_count = u32::from_ne_bytes(bytes);
170
171        Ok(Some(B3FReader {
172            file_tag,
173            version,
174            block_count,
175        }))
176    }
177
178    pub fn get_block_location<T: std::io::Read + std::io::Seek>(
179        &self,
180        reader: &mut T,
181        index: usize,
182    ) -> std::io::Result<Range<usize>> {
183        // assumed by some implementation details here
184        debug_assert_eq!(BLOCK_LENGTH_SIZE_IN_BYTES, 8);
185        let begin_size_offset = HEADER_SIZE_IN_BYTES + (index * BLOCK_LENGTH_SIZE_IN_BYTES);
186        reader.seek(SeekFrom::Start(begin_size_offset as u64))?;
187
188        let mut bytes = [0u8; 8];
189        reader.read(&mut bytes)?;
190        let mut begin = u64::from_ne_bytes(bytes.try_into().unwrap()) as usize;
191        reader.read(&mut bytes)?;
192        let end = u64::from_ne_bytes(bytes.try_into().unwrap()) as usize;
193
194        // Begin position needs to be rounded up to 16-byte offset
195        begin = ((begin + BLOCK_ALIGNMENT_IN_BYTES - 1) / BLOCK_ALIGNMENT_IN_BYTES)
196            * BLOCK_ALIGNMENT_IN_BYTES;
197
198        let mut data_offset =
199            HEADER_SIZE_IN_BYTES + ((self.block_count as usize + 1) * BLOCK_LENGTH_SIZE_IN_BYTES);
200        data_offset = ((data_offset + BLOCK_ALIGNMENT_IN_BYTES - 1) / BLOCK_ALIGNMENT_IN_BYTES)
201            * BLOCK_ALIGNMENT_IN_BYTES;
202
203        Ok((data_offset + begin)..(data_offset + end))
204    }
205
206    pub fn read_block<T: std::io::Read + std::io::Seek>(
207        &self,
208        reader: &mut T,
209        index: usize,
210    ) -> std::io::Result<Vec<u8>> {
211        let block_location = self.get_block_location(reader, index)?;
212        reader.seek(SeekFrom::Start(block_location.start as u64))?;
213        let mut bytes = vec![0u8; block_location.end - block_location.start];
214        reader.read(bytes.as_mut_slice())?;
215        Ok(bytes)
216    }
217
218    pub fn read_block_from_slice<'a>(
219        &self,
220        data: &'a [u8],
221        index: usize,
222    ) -> std::io::Result<&'a [u8]> {
223        let mut cursor = Cursor::new(data);
224        //let buf_reader = BufReader::new(data);
225        let block_location = self.get_block_location(&mut cursor, index)?;
226        Ok(&data[block_location])
227    }
228}