1use thiserror::{self, Error};
2
3use std::{
4 collections::{BTreeMap, HashMap},
5 fmt,
6 fs::File,
7 hash::Hash,
8 io::{self, Read, Seek, SeekFrom},
9 ops::{Deref, Range},
10 path::Path,
11 sync::Arc,
12};
13
14use deku::{DekuContainerRead, reader::Reader};
15use memmap2::Mmap;
16
17pub use crate::model::*;
18
19pub type BOMResult<T> = Result<T, BOMEror>;
20
21#[derive(Clone)]
22pub struct ByteSource {
23 inner: Arc<ByteSourceInner>,
24}
25
26enum ByteSourceInner {
27 Owned(Box<[u8]>),
28 Mmap(Mmap),
29}
30
31impl ByteSource {
32 pub fn from_vec(bytes: Vec<u8>) -> Self {
33 Self::from_boxed_slice(bytes.into_boxed_slice())
34 }
35
36 pub fn from_boxed_slice(bytes: Box<[u8]>) -> Self {
37 Self {
38 inner: Arc::new(ByteSourceInner::Owned(bytes)),
39 }
40 }
41
42 pub fn from_mmap(mmap: Mmap) -> Self {
43 Self {
44 inner: Arc::new(ByteSourceInner::Mmap(mmap)),
45 }
46 }
47
48 pub fn from_reader<R>(mut reader: R) -> io::Result<Self>
49 where
50 R: Read,
51 {
52 let mut bytes = Vec::new();
53 reader.read_to_end(&mut bytes)?;
54 Ok(Self::from_vec(bytes))
55 }
56
57 pub fn as_slice(&self) -> &[u8] {
58 match self.inner.as_ref() {
59 ByteSourceInner::Owned(bytes) => bytes,
60 ByteSourceInner::Mmap(mmap) => mmap,
61 }
62 }
63
64 pub fn len(&self) -> usize {
65 self.as_slice().len()
66 }
67
68 pub fn is_empty(&self) -> bool {
69 self.as_slice().is_empty()
70 }
71
72 pub fn slice(&self, range: Range<usize>) -> BOMResult<ByteSlice> {
73 if range.start > range.end || range.end > self.len() {
74 return Err(BOMEror::InvalidByteRange {
75 offset: range.start,
76 len: range.end.saturating_sub(range.start),
77 source_len: self.len(),
78 });
79 }
80
81 Ok(ByteSlice {
82 source: self.clone(),
83 range,
84 })
85 }
86}
87
88impl fmt::Debug for ByteSource {
89 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
90 f.debug_struct("ByteSource")
91 .field("len", &self.len())
92 .finish_non_exhaustive()
93 }
94}
95
96#[derive(Clone)]
97pub struct ByteSlice {
98 source: ByteSource,
99 range: Range<usize>,
100}
101
102impl ByteSlice {
103 pub fn from_vec(bytes: Vec<u8>) -> Self {
104 Self::from_boxed_slice(bytes.into_boxed_slice())
105 }
106
107 pub fn from_boxed_slice(bytes: Box<[u8]>) -> Self {
108 let source = ByteSource::from_boxed_slice(bytes);
109 source
110 .slice(0..source.len())
111 .expect("full byte source range should be valid")
112 }
113
114 pub fn as_slice(&self) -> &[u8] {
115 &self.source.as_slice()[self.range.clone()]
116 }
117
118 pub fn len(&self) -> usize {
119 self.range.end - self.range.start
120 }
121
122 pub fn is_empty(&self) -> bool {
123 self.len() == 0
124 }
125
126 pub fn to_vec(&self) -> Vec<u8> {
127 self.as_slice().to_vec()
128 }
129
130 pub fn absolute_range(&self) -> Range<usize> {
131 self.range.clone()
132 }
133
134 pub fn slice(&self, range: Range<usize>) -> BOMResult<Self> {
135 if range.start > range.end || range.end > self.len() {
136 return Err(BOMEror::InvalidByteRange {
137 offset: self.range.start.saturating_add(range.start),
138 len: range.end.saturating_sub(range.start),
139 source_len: self.source.len(),
140 });
141 }
142
143 let start = self.range.start + range.start;
144 let end = self.range.start + range.end;
145 self.source.slice(start..end)
146 }
147}
148
149impl Deref for ByteSlice {
150 type Target = [u8];
151
152 fn deref(&self) -> &Self::Target {
153 self.as_slice()
154 }
155}
156
157impl fmt::Debug for ByteSlice {
158 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
159 f.debug_struct("ByteSlice")
160 .field("range", &self.range)
161 .field("len", &self.len())
162 .finish()
163 }
164}
165
166#[derive(Clone, Debug)]
167pub struct BOMBlock {
168 bytes: ByteSlice,
169 position: u64,
170}
171
172impl BOMBlock {
173 pub fn new(bytes: ByteSlice) -> Self {
174 Self { bytes, position: 0 }
175 }
176
177 pub fn as_slice(&self) -> &[u8] {
178 self.bytes.as_slice()
179 }
180
181 pub fn byte_slice(&self) -> &ByteSlice {
182 &self.bytes
183 }
184
185 pub fn len(&self) -> usize {
186 self.bytes.len()
187 }
188
189 pub fn is_empty(&self) -> bool {
190 self.bytes.is_empty()
191 }
192
193 pub fn slice_at_current(&mut self, len: usize) -> BOMResult<ByteSlice> {
194 let start = usize::try_from(self.position).map_err(|_| BOMEror::InvalidByteRange {
195 offset: usize::MAX,
196 len,
197 source_len: self.bytes.len(),
198 })?;
199 let end = start.checked_add(len).ok_or(BOMEror::InvalidByteRange {
200 offset: start,
201 len,
202 source_len: self.bytes.len(),
203 })?;
204 let slice = self.bytes.slice(start..end)?;
205 self.position = end as u64;
206 Ok(slice)
207 }
208}
209
210impl Read for BOMBlock {
211 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
212 let pos = usize::try_from(self.position).unwrap_or(usize::MAX);
213 let data = self.bytes.as_slice();
214 if pos >= data.len() {
215 return Ok(0);
216 }
217
218 let amt = buf.len().min(data.len() - pos);
219 buf[..amt].copy_from_slice(&data[pos..pos + amt]);
220 self.position += amt as u64;
221 Ok(amt)
222 }
223}
224
225impl Seek for BOMBlock {
226 fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
227 let len = self.bytes.len() as i128;
228 let current = self.position as i128;
229 let next = match pos {
230 SeekFrom::Start(offset) => offset as i128,
231 SeekFrom::End(offset) => len.checked_add(offset as i128).ok_or_else(|| {
232 io::Error::new(io::ErrorKind::InvalidInput, "seek position overflow")
233 })?,
234 SeekFrom::Current(offset) => current.checked_add(offset as i128).ok_or_else(|| {
235 io::Error::new(io::ErrorKind::InvalidInput, "seek position overflow")
236 })?,
237 };
238
239 if next < 0 {
240 return Err(io::Error::new(
241 io::ErrorKind::InvalidInput,
242 "invalid seek before start of BOM block",
243 ));
244 }
245
246 self.position = next as u64;
247 Ok(self.position)
248 }
249}
250
251pub struct BOM {
252 source: ByteSource,
253 store_header: StoreHeader,
254}
255
256impl BOM {
257 pub fn new<R>(mut reader: R) -> BOMResult<Self>
258 where
259 R: Read + Seek,
260 {
261 reader.seek(SeekFrom::Start(0))?;
262 let source = ByteSource::from_reader(reader)?;
263 Self::from_byte_source(source)
264 }
265
266 pub fn from_bytes(bytes: Vec<u8>) -> BOMResult<Self> {
267 Self::from_byte_source(ByteSource::from_vec(bytes))
268 }
269
270 pub fn from_boxed_slice(bytes: Box<[u8]>) -> BOMResult<Self> {
271 Self::from_byte_source(ByteSource::from_boxed_slice(bytes))
272 }
273
274 pub fn from_byte_source(source: ByteSource) -> BOMResult<Self> {
275 let full = source.slice(0..source.len())?;
276 let mut block = BOMBlock::new(full);
277 let (_, store_header) = StoreHeader::from_reader((&mut block, 0))?;
278 Ok(BOM {
279 source,
280 store_header,
281 })
282 }
283
284 pub fn source(&self) -> &ByteSource {
285 &self.source
286 }
287
288 fn block_with_name(&mut self, name: &[u8]) -> BOMResult<BOMBlock> {
289 let var = self
290 .store_header
291 .var_with_name(name)
292 .ok_or(BOMEror::NotFoundVar(
293 String::from_utf8_lossy(name).to_string(),
294 ))?;
295
296 self.block_at(var.index as usize)
297 }
298
299 fn block_at(&mut self, index: usize) -> BOMResult<BOMBlock> {
300 let idx = self.store_header.index_store.indexs.get(index);
301 if let Some(idx) = idx {
302 let offset = idx.offset as usize;
303 let len = idx.len as usize;
304 let end = offset.checked_add(len).ok_or(BOMEror::InvalidIndexRange {
305 index,
306 offset,
307 len,
308 source_len: self.source.len(),
309 })?;
310 if end > self.source.len() {
311 return Err(BOMEror::InvalidIndexRange {
312 index,
313 offset,
314 len,
315 source_len: self.source.len(),
316 });
317 }
318
319 return self.source.slice(offset..end).map(BOMBlock::new);
320 }
321
322 Err(BOMEror::NotFoundIndex(index))
323 }
324
325 fn tree_with_name(&mut self, name: &[u8]) -> BOMResult<Vec<TreePaths>> {
326 let mut block = self.block_with_name(name)?;
327 let (_, header) = TreeHeader::from_reader((&mut block, 0))?;
328
329 let mut tree_paths = vec![];
330 let mut tree_idx = header.index;
331 loop {
332 let path: TreePaths = self.read_block_at(tree_idx as usize)?;
333 if path.is_leaf == 0 {
334 if let Some(idx) = path.indices.first() {
335 tree_idx = idx.val;
336 continue;
337 }
338 break;
339 }
340
341 let next_idx = path.forward;
342 tree_paths.push(path);
343 if next_idx > 0 {
344 tree_idx = next_idx;
345 } else {
346 break;
347 }
348 }
349
350 Ok(tree_paths)
351 }
352
353 pub fn read_block_at<'a, T>(&mut self, index: usize) -> BOMResult<T>
354 where
355 T: deku::DekuReader<'a>,
356 {
357 let block = self.block_at(index)?;
358 let mut reader = Reader::new(block);
359 let data = T::from_reader_with_ctx(&mut reader, ())?;
360
361 Ok(data)
362 }
363
364 pub fn read_block_with_name<'a, T>(&mut self, name: &[u8]) -> BOMResult<T>
365 where
366 T: deku::DekuReader<'a>,
367 {
368 let var = self
369 .store_header
370 .var_with_name(name)
371 .ok_or(BOMEror::NotFoundVar(
372 String::from_utf8_lossy(name).to_string(),
373 ))?;
374 self.read_block_at(var.index as usize)
375 }
376
377 pub fn read_tree_to_btree_map<'a, K, V>(&mut self, name: &[u8]) -> BOMResult<BTreeMap<K, V>>
378 where
379 K: deku::DekuReader<'a> + Ord,
380 V: deku::DekuReader<'a>,
381 {
382 let mut map = BTreeMap::new();
383 self.parse_tree(name, |k, v| {
384 let k = K::from_reader_with_ctx(&mut Reader::new(k), ())?;
385 let v = V::from_reader_with_ctx(&mut Reader::new(v), ())?;
386 map.insert(k, v);
387
388 Ok(())
389 })?;
390
391 Ok(map)
392 }
393
394 pub fn read_tree_to_map<'a, K, V>(&mut self, name: &[u8]) -> BOMResult<HashMap<K, V>>
395 where
396 K: deku::DekuReader<'a> + Ord + Hash,
397 V: deku::DekuReader<'a>,
398 {
399 let mut map = HashMap::new();
400 self.parse_tree(name, |k, v| {
401 let k = K::from_reader_with_ctx(&mut Reader::new(k), ())?;
402 let v = V::from_reader_with_ctx(&mut Reader::new(v), ())?;
403 map.insert(k, v);
404
405 Ok(())
406 })?;
407
408 Ok(map)
409 }
410
411 pub fn parse_tree<F>(&mut self, name: &[u8], mut block: F) -> BOMResult<()>
412 where
413 F: FnMut(BOMBlock, BOMBlock) -> BOMResult<()>,
414 {
415 let paths = self.tree_with_name(name)?;
416 for path in paths {
417 for i in path.indices {
418 let k = self.block_at(i.key as usize)?;
419 let v = self.block_at(i.val as usize)?;
420 block(k, v)?;
421 }
422 }
423
424 Ok(())
425 }
426}
427
428impl BOM {
429 pub fn new_with_file<P>(file_path: P) -> BOMResult<Self>
430 where
431 P: AsRef<Path>,
432 {
433 let file = File::options().read(true).open(file_path)?;
434 let mmap = unsafe { Mmap::map(&file) }?;
435 Self::from_byte_source(ByteSource::from_mmap(mmap))
436 }
437}
438
439#[derive(Error, Debug)]
440pub enum BOMEror {
441 #[error("Read failed {0}")]
442 ReadIO(#[from] io::Error),
443 #[error("Parse struct failed {0}")]
444 ParseStruct(#[from] deku::DekuError),
445 #[error("Cann't not found for index {0}")]
446 NotFoundIndex(usize),
447 #[error("Invalid BOM index range {index}: offset {offset}, len {len}, source len {source_len}")]
448 InvalidIndexRange {
449 index: usize,
450 offset: usize,
451 len: usize,
452 source_len: usize,
453 },
454 #[error("Invalid byte range: offset {offset}, len {len}, source len {source_len}")]
455 InvalidByteRange {
456 offset: usize,
457 len: usize,
458 source_len: usize,
459 },
460 #[error("Cann't not found for name {0}")]
461 NotFoundVar(String),
462 #[error("Cann't not found for tree {0}")]
463 NotFoundTree(String),
464}
465
466#[cfg(test)]
467mod tests {
468 use std::io::Read;
469
470 use super::{BOM, BOMEror};
471
472 fn push_be_u32(bytes: &mut Vec<u8>, value: u32) {
473 bytes.extend_from_slice(&value.to_be_bytes());
474 }
475
476 fn bom_bytes_with_index(offset: u32, len: u32, payload: &[u8]) -> Vec<u8> {
477 let index_offset = 32u32;
478 let var_offset = 44u32;
479 let mut bytes = Vec::new();
480 bytes.extend_from_slice(b"BOMStore");
481 push_be_u32(&mut bytes, 1); push_be_u32(&mut bytes, 1); push_be_u32(&mut bytes, index_offset);
484 push_be_u32(&mut bytes, 12); push_be_u32(&mut bytes, var_offset);
486 push_be_u32(&mut bytes, 4); push_be_u32(&mut bytes, 1); push_be_u32(&mut bytes, offset);
489 push_be_u32(&mut bytes, len);
490 push_be_u32(&mut bytes, 0); let offset = offset as usize;
493 if bytes.len() < offset {
494 bytes.resize(offset, 0);
495 }
496 bytes.extend_from_slice(payload);
497 bytes
498 }
499
500 #[test]
501 fn block_at_returns_range_view_bytes() {
502 let bytes = bom_bytes_with_index(64, 5, b"hello");
503 let mut bom = BOM::from_bytes(bytes).expect("synthetic BOM should parse");
504
505 let mut block = bom.block_at(0).expect("block should exist");
506 assert_eq!(block.as_slice(), b"hello");
507 assert_eq!(block.byte_slice().absolute_range(), 64..69);
508
509 let mut read = Vec::new();
510 block.read_to_end(&mut read).expect("block should read");
511 assert_eq!(read, b"hello");
512 }
513
514 #[test]
515 fn block_at_rejects_out_of_range_index() {
516 let bytes = bom_bytes_with_index(100, 10, &[]);
517 let mut bom = BOM::from_bytes(bytes).expect("synthetic BOM should parse");
518
519 let err = bom
520 .block_at(0)
521 .expect_err("invalid index range should fail");
522 assert!(matches!(
523 err,
524 BOMEror::InvalidIndexRange {
525 index: 0,
526 offset: 100,
527 len: 10,
528 ..
529 }
530 ));
531 }
532}