1use std::fmt::Debug;
2use std::io::{Cursor, Read, Write};
3use std::mem::size_of;
4
5use bytes::Bytes;
6use serde::Serialize;
7
8use super::shard_file::MDB_FILE_INFO_ENTRY_SIZE;
9use super::xorb_structs::{XorbChunkSequenceEntry, XorbChunkSequenceHeader};
10use crate::merklehash::data_hash::hex;
11use crate::merklehash::{DataHash, MerkleHash};
12use crate::serialization_utils::*;
13
14pub const MDB_DEFAULT_FILE_FLAG: u32 = 0;
15pub const MDB_FILE_FLAG_WITH_VERIFICATION: u32 = 1 << 31;
16pub const MDB_FILE_FLAG_VERIFICATION_MASK: u32 = 1 << 31;
17pub const MDB_FILE_FLAG_WITH_METADATA_EXT: u32 = 1 << 30;
18pub const MDB_FILE_FLAG_METADATA_EXT_MASK: u32 = 1 << 30;
19
20pub type Sha256 = DataHash;
21
22#[derive(Clone, Debug, Default, PartialEq, Serialize)]
27#[repr(C)]
30pub struct FileDataSequenceHeader {
31 #[serde(with = "hex::serde")]
32 pub file_hash: MerkleHash,
33 pub file_flags: u32,
34 pub num_entries: u32,
35 pub _unused: u64,
36}
37
38impl FileDataSequenceHeader {
39 pub fn new<I: TryInto<u32>>(
40 file_hash: MerkleHash,
41 num_entries: I,
42 contains_verification: bool,
43 contains_metadata_ext: bool,
44 ) -> Self
45 where
46 <I as TryInto<u32>>::Error: Debug,
47 {
48 let verification_flag = if contains_verification {
49 MDB_FILE_FLAG_WITH_VERIFICATION
50 } else {
51 Default::default()
52 };
53 let metadata_ext_flag = if contains_metadata_ext {
54 MDB_FILE_FLAG_WITH_METADATA_EXT
55 } else {
56 Default::default()
57 };
58 let file_flags = MDB_DEFAULT_FILE_FLAG | verification_flag | metadata_ext_flag;
59 Self {
60 file_hash,
61 file_flags,
62 num_entries: num_entries.try_into().unwrap(),
63 #[cfg(test)]
64 _unused: 126846135456846514u64,
65 #[cfg(not(test))]
66 _unused: 0,
67 }
68 }
69
70 pub fn bookend() -> Self {
71 Self {
72 file_hash: [!0u64; 4].into(),
74 ..Default::default()
75 }
76 }
77
78 pub fn is_bookend(&self) -> bool {
79 self.file_hash == [!0u64; 4].into()
80 }
81
82 pub fn serialize<W: Write>(&self, writer: &mut W) -> Result<usize, std::io::Error> {
83 let mut buf = [0u8; size_of::<Self>()];
84 {
85 let mut writer_cur = Cursor::new(&mut buf[..]);
86 let writer = &mut writer_cur;
87
88 write_hash(writer, &self.file_hash)?;
89 write_u32(writer, self.file_flags)?;
90 write_u32(writer, self.num_entries)?;
91 write_u64(writer, self._unused)?;
92 }
93
94 writer.write_all(&buf[..])?;
95
96 Ok(size_of::<FileDataSequenceHeader>())
97 }
98
99 pub fn deserialize<R: Read>(reader: &mut R) -> Result<Self, std::io::Error> {
100 let mut v = [0u8; size_of::<Self>()];
101 reader.read_exact(&mut v[..])?;
102 let mut reader_curs = Cursor::new(&v);
103 let reader = &mut reader_curs;
104
105 Ok(Self {
106 file_hash: read_hash(reader)?,
107 file_flags: read_u32(reader)?,
108 num_entries: read_u32(reader)?,
109 _unused: read_u64(reader)?,
110 })
111 }
112
113 pub fn contains_metadata_ext(&self) -> bool {
114 (self.file_flags & MDB_FILE_FLAG_METADATA_EXT_MASK) != 0
115 }
116
117 pub fn contains_verification(&self) -> bool {
118 (self.file_flags & MDB_FILE_FLAG_VERIFICATION_MASK) != 0
119 }
120
121 pub fn num_info_entry_following(&self) -> u32 {
124 let num_metadata_ext = if self.contains_metadata_ext() { 1 } else { 0 };
125 if self.contains_verification() {
126 self.num_entries * 2 + num_metadata_ext
127 } else {
128 self.num_entries + num_metadata_ext
129 }
130 }
131
132 #[inline]
136 pub fn verify_same_file(header1: &Self, header2: &Self) {
137 debug_assert_eq!(header1.file_hash, header2.file_hash, "hashes don't match");
138 debug_assert_eq!(header1.num_entries, header2.num_entries, "num entries for same hash don't match");
139 }
140
141 pub fn compare_flag_superset(header_a: &Self, header_b: &Self) -> SupersetResult {
149 let flags0 = header_a.file_flags;
150 let flags1 = header_b.file_flags;
151 if flags0 == flags1 {
152 SupersetResult::Equal
153 } else if flags0 & flags1 == flags1 {
154 SupersetResult::SuperA
155 } else if flags1 & flags0 == flags0 {
156 SupersetResult::SuperB
157 } else {
158 SupersetResult::Neither
159 }
160 }
161}
162
163#[derive(Debug, Copy, Clone, Eq, PartialEq)]
165pub enum SupersetResult {
166 SuperA,
167 SuperB,
168 Neither,
169 Equal,
170}
171
172#[derive(Clone, Debug, Default, PartialEq, Serialize)]
173#[repr(C)] pub struct FileDataSequenceEntry {
175 #[serde(with = "hex::serde", rename = "cas_hash")]
177 pub xorb_hash: MerkleHash,
178 #[serde(rename = "cas_flags")]
179 pub xorb_flags: u32,
180 pub unpacked_segment_bytes: u32,
181 pub chunk_index_start: u32,
182 pub chunk_index_end: u32,
183}
184
185impl FileDataSequenceEntry {
186 pub fn new<I1: TryInto<u32>>(
187 xorb_hash: MerkleHash,
188 unpacked_segment_bytes: I1,
189 chunk_index_start: I1,
190 chunk_index_end: I1,
191 ) -> Self
192 where
193 <I1 as TryInto<u32>>::Error: Debug,
194 {
195 Self {
196 xorb_hash,
197 xorb_flags: MDB_DEFAULT_FILE_FLAG,
198 unpacked_segment_bytes: unpacked_segment_bytes.try_into().unwrap(),
199 chunk_index_start: chunk_index_start.try_into().unwrap(),
200 chunk_index_end: chunk_index_end.try_into().unwrap(),
201 }
202 }
203
204 pub fn from_xorb_entries<I1: TryInto<u32>>(
205 metadata: &XorbChunkSequenceHeader,
206 chunks: &[XorbChunkSequenceEntry],
207 chunk_index_start: I1,
208 chunk_index_end: I1,
209 ) -> Self
210 where
211 <I1 as TryInto<u32>>::Error: Debug,
212 {
213 if chunks.is_empty() {
214 return Self::default();
215 }
216
217 Self {
218 xorb_hash: metadata.xorb_hash,
219 xorb_flags: metadata.xorb_flags,
220 unpacked_segment_bytes: chunks.iter().map(|sb| sb.unpacked_segment_bytes).sum(),
221 chunk_index_start: chunk_index_start.try_into().unwrap(),
222 chunk_index_end: chunk_index_end.try_into().unwrap(),
223 }
224 }
225
226 pub fn serialize<W: Write>(&self, writer: &mut W) -> Result<usize, std::io::Error> {
227 let mut buf = [0u8; size_of::<Self>()];
228 {
229 let mut writer_cur = Cursor::new(&mut buf[..]);
230 let writer = &mut writer_cur;
231
232 write_hash(writer, &self.xorb_hash)?;
233 write_u32(writer, self.xorb_flags)?;
234 write_u32(writer, self.unpacked_segment_bytes)?;
235 write_u32(writer, self.chunk_index_start)?;
236 write_u32(writer, self.chunk_index_end)?;
237 }
238
239 writer.write_all(&buf[..])?;
240
241 Ok(size_of::<FileDataSequenceEntry>())
242 }
243
244 pub fn deserialize<R: Read>(reader: &mut R) -> Result<Self, std::io::Error> {
245 let mut v = [0u8; size_of::<FileDataSequenceEntry>()];
246 reader.read_exact(&mut v[..])?;
247
248 let mut reader_curs = Cursor::new(&v);
249 let reader = &mut reader_curs;
250
251 Ok(Self {
252 xorb_hash: read_hash(reader)?,
253 xorb_flags: read_u32(reader)?,
254 unpacked_segment_bytes: read_u32(reader)?,
255 chunk_index_start: read_u32(reader)?,
256 chunk_index_end: read_u32(reader)?,
257 })
258 }
259}
260
261#[derive(Clone, Debug, Default, PartialEq, Serialize)]
262pub struct FileVerificationEntry {
263 #[serde(with = "hex::serde")]
264 pub range_hash: MerkleHash,
265 pub _unused: [u64; 2],
266}
267
268impl FileVerificationEntry {
269 pub fn new(range_hash: MerkleHash) -> Self {
270 Self {
271 range_hash,
272 _unused: Default::default(),
273 }
274 }
275
276 pub fn serialize<W: Write>(&self, writer: &mut W) -> Result<usize, std::io::Error> {
277 let mut buf = [0u8; size_of::<Self>()];
278
279 {
280 let mut writer = Cursor::new(&mut buf[..]);
281 write_hash(&mut writer, &self.range_hash)?;
282 write_u64s(&mut writer, &self._unused)?;
283 }
284
285 writer.write_all(&buf)?;
286
287 Ok(size_of::<Self>())
288 }
289
290 pub fn deserialize<R: Read>(reader: &mut R) -> Result<Self, std::io::Error> {
291 let mut v = [0u8; size_of::<Self>()];
292 reader.read_exact(&mut v[..])?;
293
294 let mut reader_curs = Cursor::new(&v);
295 let reader = &mut reader_curs;
296
297 Ok(Self {
298 range_hash: read_hash(reader)?,
299 _unused: Default::default(),
300 })
301 }
302}
303
304#[derive(Clone, Debug, Default, PartialEq, Serialize)]
306pub struct FileMetadataExt {
307 #[serde(with = "hex::serde")]
308 pub sha256: Sha256,
309 pub _unused: [u64; 2],
310}
311
312impl FileMetadataExt {
313 pub fn new(sha256: Sha256) -> Self {
314 Self {
315 sha256,
316 _unused: Default::default(),
317 }
318 }
319
320 pub fn serialize<W: Write>(&self, writer: &mut W) -> Result<usize, std::io::Error> {
321 let mut buf = [0u8; size_of::<Self>()];
322
323 {
324 let mut writer = Cursor::new(&mut buf[..]);
325 write_hash(&mut writer, &self.sha256)?;
326 write_u64s(&mut writer, &self._unused)?;
327 }
328
329 writer.write_all(&buf)?;
330
331 Ok(size_of::<Self>())
332 }
333
334 pub fn deserialize<R: Read>(reader: &mut R) -> Result<Self, std::io::Error> {
335 let mut v = [0u8; size_of::<Self>()];
336 reader.read_exact(&mut v[..])?;
337
338 let mut reader_curs = Cursor::new(&v);
339 let reader = &mut reader_curs;
340
341 Ok(Self {
342 sha256: read_hash(reader)?,
343 _unused: Default::default(),
344 })
345 }
346}
347
348#[derive(Clone, Debug, Default, PartialEq, Serialize)]
349pub struct MDBFileInfo {
350 pub metadata: FileDataSequenceHeader,
351 pub segments: Vec<FileDataSequenceEntry>,
352 pub verification: Vec<FileVerificationEntry>,
353 pub metadata_ext: Option<FileMetadataExt>,
354}
355
356impl MDBFileInfo {
357 pub fn num_bytes(&self) -> u64 {
358 size_of::<FileDataSequenceHeader>() as u64
359 + self.metadata.num_info_entry_following() as u64 * MDB_FILE_INFO_ENTRY_SIZE as u64
360 }
361
362 pub fn file_size(&self) -> u64 {
364 self.segments.iter().map(|fse| fse.unpacked_segment_bytes as u64).sum()
365 }
366
367 pub fn serialize<W: Write>(&self, writer: &mut W) -> Result<usize, std::io::Error> {
368 if self.contains_verification() {
369 debug_assert!(self.segments.len() == self.verification.len());
370 }
371
372 let mut bytes_written = 0;
373
374 bytes_written += self.metadata.serialize(writer)?;
375
376 for file_segment in self.segments.iter() {
377 bytes_written += file_segment.serialize(writer)?;
378 }
379
380 if self.contains_verification() {
381 for verification in self.verification.iter() {
382 bytes_written += verification.serialize(writer)?;
383 }
384 }
385 if let Some(metadata_ext) = self.metadata_ext.as_ref() {
386 bytes_written += metadata_ext.serialize(writer)?;
387 }
388
389 Ok(bytes_written)
390 }
391
392 pub fn deserialize<R: Read>(reader: &mut R) -> Result<Option<Self>, std::io::Error> {
393 let metadata = FileDataSequenceHeader::deserialize(reader)?;
394
395 if metadata.is_bookend() {
397 return Ok(None);
398 }
399
400 let num_entries = metadata.num_entries as usize;
401
402 let mut segments = Vec::with_capacity(num_entries);
403 for _ in 0..num_entries {
404 segments.push(FileDataSequenceEntry::deserialize(reader)?);
405 }
406
407 let mut verification = Vec::with_capacity(num_entries);
408 if metadata.contains_verification() {
409 for _ in 0..num_entries {
410 verification.push(FileVerificationEntry::deserialize(reader)?);
411 }
412 }
413 let metadata_ext = metadata
414 .contains_metadata_ext()
415 .then(|| FileMetadataExt::deserialize(reader))
416 .transpose()?;
417
418 Ok(Some(Self {
419 metadata,
420 segments,
421 verification,
422 metadata_ext,
423 }))
424 }
425
426 pub fn contains_verification(&self) -> bool {
427 self.metadata.contains_verification()
428 }
429
430 pub fn contains_metadata_ext(&self) -> bool {
431 self.metadata.contains_metadata_ext()
432 }
433
434 pub fn merge_from(&mut self, other: &Self) -> crate::error::Result<()> {
438 FileDataSequenceHeader::verify_same_file(&self.metadata, &other.metadata);
439 if self.contains_verification() != other.contains_verification() && other.contains_verification() {
440 self.metadata.file_flags |= MDB_FILE_FLAG_WITH_VERIFICATION;
442 self.verification.clone_from(&other.verification);
443 }
444 if self.contains_metadata_ext() != other.contains_metadata_ext() && other.contains_metadata_ext() {
445 self.metadata.file_flags |= MDB_FILE_FLAG_WITH_METADATA_EXT;
447 self.metadata_ext.clone_from(&other.metadata_ext);
448 }
449 Ok(())
450 }
451
452 #[cfg(test)]
459 pub fn equal_accepting_no_verification(&self, other: &Self) -> bool {
460 if self.contains_verification() ^ other.contains_verification() {
461 self.metadata.num_entries == other.metadata.num_entries
462 && self.metadata.file_hash == other.metadata.file_hash
463 && self.metadata.contains_metadata_ext() == other.metadata.contains_metadata_ext()
464 && self.metadata_ext == other.metadata_ext
465 && self.segments == other.segments
466 } else {
467 self == other
468 }
469 }
470}
471
472#[derive(Debug, Clone, PartialEq)]
473pub struct MDBFileInfoView {
474 header: FileDataSequenceHeader,
475 data: Bytes, }
477
478impl MDBFileInfoView {
479 pub fn new(data: Bytes) -> std::io::Result<Self> {
482 let header = FileDataSequenceHeader::deserialize(&mut Cursor::new(&data))?;
483 Self::from_data_and_header(header, data)
484 }
485
486 pub fn from_data_and_header(header: FileDataSequenceHeader, data: Bytes) -> std::io::Result<Self> {
487 let n = header.num_entries as usize;
489 let contains_verification = header.contains_verification();
490 let contains_metadata_ext = header.contains_metadata_ext();
491
492 let n_structs = 1 + n + (if contains_verification { n } else { 0 }) + (if contains_metadata_ext { 1 } else { 0 });
495
496 if data.len() < n_structs * MDB_FILE_INFO_ENTRY_SIZE {
497 return Err(std::io::Error::new(
498 std::io::ErrorKind::UnexpectedEof,
499 "Provided slice too small to read MDBFileInfoView",
500 ));
501 }
502
503 Ok(Self { header, data })
504 }
505
506 pub fn header(&self) -> &FileDataSequenceHeader {
507 &self.header
508 }
509
510 #[inline]
511 pub fn num_entries(&self) -> usize {
512 self.header.num_entries as usize
513 }
514
515 #[inline]
516 pub fn file_hash(&self) -> MerkleHash {
517 self.header.file_hash
518 }
519
520 #[inline]
521 pub fn file_flags(&self) -> u32 {
522 self.header.file_flags
523 }
524
525 #[inline]
526 pub fn contains_metadata_ext(&self) -> bool {
527 self.header.contains_metadata_ext()
528 }
529
530 #[inline]
531 pub fn contains_verification(&self) -> bool {
532 self.header.contains_verification()
533 }
534
535 #[inline]
536 pub fn entry(&self, idx: usize) -> FileDataSequenceEntry {
537 debug_assert!(idx < self.num_entries());
538
539 FileDataSequenceEntry::deserialize(&mut Cursor::new(&self.data[((1 + idx) * MDB_FILE_INFO_ENTRY_SIZE)..]))
540 .expect("bookkeeping error on data bounds for entry")
541 }
542
543 #[inline]
544 pub fn verification(&self, idx: usize) -> FileVerificationEntry {
545 debug_assert!(self.contains_verification());
546 debug_assert!(idx < self.num_entries());
547
548 FileVerificationEntry::deserialize(&mut Cursor::new(
549 &self.data[((1 + self.num_entries() + idx) * MDB_FILE_INFO_ENTRY_SIZE)..],
550 ))
551 .expect("bookkeeping error on data bounds for verification")
552 }
553
554 pub fn byte_size(&self, with_verification: bool) -> usize {
555 let n = self.num_entries();
556 let n_structs = 1 + n + (if with_verification && self.contains_verification() { n } else { 0 }) + (if self.contains_metadata_ext() { 1 } else { 0 });
559
560 n_structs * MDB_FILE_INFO_ENTRY_SIZE
561 }
562
563 #[inline]
564 pub fn serialize<W: Write>(&self, writer: &mut W, with_verification: bool) -> std::io::Result<usize> {
565 let have_verification = self.contains_verification();
566 if with_verification && !have_verification {
567 return Err(std::io::Error::other("missing requested verification info"));
568 }
569 let n_bytes = if !with_verification && have_verification {
570 let header =
572 FileDataSequenceHeader::new(self.file_hash(), self.num_entries(), false, self.contains_metadata_ext());
573 header.serialize(writer)?;
574 let mut num_written = MDB_FILE_INFO_ENTRY_SIZE;
575
576 writer.write_all(
577 &self.data[(MDB_FILE_INFO_ENTRY_SIZE)..((1 + self.num_entries()) * MDB_FILE_INFO_ENTRY_SIZE)],
578 )?;
579 num_written += self.num_entries() * MDB_FILE_INFO_ENTRY_SIZE;
580 if self.contains_metadata_ext() {
581 writer.write_all(&self.data[(self.data.len() - MDB_FILE_INFO_ENTRY_SIZE)..])?;
582 num_written += MDB_FILE_INFO_ENTRY_SIZE;
583 }
584 num_written
586 } else {
587 writer.write_all(&self.data)?;
589 self.data.len()
590 };
591
592 Ok(n_bytes)
593 }
594
595 #[inline]
596 pub fn bytes(&self) -> Bytes {
597 self.data.clone()
598 }
599
600 #[inline]
602 pub fn metadata_ext(&self) -> Option<FileMetadataExt> {
603 if !self.contains_metadata_ext() {
604 return None;
605 }
606 let offset = self.data.len() - MDB_FILE_INFO_ENTRY_SIZE;
608 FileMetadataExt::deserialize(&mut Cursor::new(&self.data[offset..])).ok()
609 }
610}
611
612impl From<&MDBFileInfoView> for MDBFileInfo {
613 fn from(view: &MDBFileInfoView) -> Self {
614 let segments: Vec<FileDataSequenceEntry> = (0..view.num_entries()).map(|i| view.entry(i)).collect();
615 let verification = if view.contains_verification() {
616 (0..view.num_entries()).map(|i| view.verification(i)).collect()
617 } else {
618 vec![]
619 };
620 MDBFileInfo {
621 metadata: FileDataSequenceHeader::new(
622 view.file_hash(),
623 segments.len(),
624 view.contains_verification(),
625 view.contains_metadata_ext(),
626 ),
627 segments,
628 verification,
629 metadata_ext: view.metadata_ext(),
630 }
631 }
632}
633
634#[cfg(test)]
635mod tests {
636 use itertools::{Itertools, iproduct};
637 use rand::SeedableRng;
638 use rand::prelude::StdRng;
639
640 use super::*;
641 use crate::metadata_shard::shard_format::test_routines::{gen_random_file_info, simple_hash};
642
643 #[test]
644 fn test_serde_has_metadata_ext() {
645 let seed = 3;
646 let mut rng = StdRng::seed_from_u64(seed);
647 let file_info = gen_random_file_info(&mut rng, &2, true, true);
648
649 assert!(file_info.metadata_ext.is_some());
650 assert_eq!(file_info.metadata.num_info_entry_following(), file_info.metadata.num_entries * 2 + 1);
651
652 let size = file_info.num_bytes();
653 let mut buffer = Vec::new();
654 let bytes_written = file_info.serialize(&mut buffer).unwrap();
655 assert_eq!(bytes_written as u64, size);
656 assert_eq!(buffer.len(), bytes_written);
657
658 let new_info = MDBFileInfo::deserialize(&mut &buffer[..]).unwrap().unwrap(); assert_eq!(file_info, new_info);
660 }
661
662 #[test]
663 fn test_compare_flags() {
664 let hash = simple_hash(42);
665 let bool_cases = vec![false, true];
666 let cases = iproduct!(bool_cases.clone(), bool_cases)
668 .map(|(has_validation, has_metadata_ext)| {
669 FileDataSequenceHeader::new(hash, 5, has_validation, has_metadata_ext)
670 })
671 .collect_vec();
672 let expected = vec![
674 SupersetResult::Equal, SupersetResult::SuperB, SupersetResult::SuperB, SupersetResult::SuperB, SupersetResult::SuperA, SupersetResult::Equal, SupersetResult::Neither, SupersetResult::SuperB, SupersetResult::SuperA, SupersetResult::Neither, SupersetResult::Equal, SupersetResult::SuperB, SupersetResult::SuperA, SupersetResult::SuperA, SupersetResult::SuperA, SupersetResult::Equal, ];
691
692 let results = cases
693 .iter()
694 .flat_map(|a| cases.iter().map(|b| FileDataSequenceHeader::compare_flag_superset(a, b)))
695 .collect_vec();
696
697 assert_eq!(expected, results);
698 }
699}