Skip to main content

tzap_core/
metadata.rs

1use std::collections::{BTreeSet, HashMap, HashSet};
2
3use sha2::{Digest, Sha256};
4use unicode_normalization::UnicodeNormalization;
5
6use crate::format::FormatError;
7
8const TZIR_MAGIC: [u8; 4] = *b"TZIR";
9const TZIS_MAGIC: [u8; 4] = *b"TZIS";
10const TZDH_MAGIC: [u8; 4] = *b"TZDH";
11
12pub const INDEX_ROOT_LEN: usize = 160;
13pub const SHARD_ENTRY_LEN: usize = 52;
14pub const DIRECTORY_HINT_SHARD_ENTRY_LEN: usize = 56;
15pub const ENVELOPE_ENTRY_LEN: usize = 48;
16pub const FRAME_ENTRY_LEN: usize = 44;
17pub const INDEX_SHARD_HEADER_LEN: usize = 64;
18pub const FILE_ENTRY_LEN: usize = 56;
19pub const DIRECTORY_HINT_TABLE_LEN: usize = 72;
20pub const DIRECTORY_HINT_ENTRY_LEN: usize = 40;
21
22const FRAME_KNOWN_FLAGS: u32 = 0x0000_0003;
23const DEFAULT_MAX_HASH_COLLISION_SHARD_SCAN: usize = 16;
24const REED_SOLOMON_GF16_MAX_TOTAL_SHARDS: u64 = 65_535;
25const SHA256_EMPTY: [u8; 32] = [
26    0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
27    0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
28];
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
31pub struct MetadataLimits {
32    pub block_size: u32,
33    pub max_path_length: u32,
34    pub max_hash_collision_shard_scan: usize,
35    pub max_shard_count: u32,
36    pub max_directory_hint_shards: u32,
37    pub max_files_per_index_shard: u32,
38    pub max_entries_per_directory_hint_shard: u64,
39    pub max_payload_data_shards: u16,
40    pub max_payload_parity_shards: u16,
41    pub max_index_data_shards: u16,
42    pub max_index_parity_shards: u16,
43    pub max_index_root_data_shards: u16,
44    pub max_index_root_parity_shards: u16,
45}
46
47impl Default for MetadataLimits {
48    fn default() -> Self {
49        Self {
50            block_size: 4096,
51            max_path_length: 4096,
52            max_hash_collision_shard_scan: DEFAULT_MAX_HASH_COLLISION_SHARD_SCAN,
53            max_shard_count: 1_000_000,
54            max_directory_hint_shards: 1_000_000,
55            max_files_per_index_shard: 1_000_000,
56            max_entries_per_directory_hint_shard: 1_000_000,
57            max_payload_data_shards: u16::MAX,
58            max_payload_parity_shards: u16::MAX,
59            max_index_data_shards: u16::MAX,
60            max_index_parity_shards: u16::MAX,
61            max_index_root_data_shards: u16::MAX,
62            max_index_root_parity_shards: u16::MAX,
63        }
64    }
65}
66
67#[derive(Debug, Clone, PartialEq, Eq)]
68pub struct IndexRootHeader {
69    pub version: u32,
70    pub shard_count: u32,
71    pub directory_hint_shard_count: u32,
72    pub frame_count: u64,
73    pub envelope_count: u64,
74    pub file_count: u64,
75    pub payload_block_count: u64,
76    pub tar_total_size: u64,
77    pub content_sha256: [u8; 32],
78    pub shard_table_offset: u64,
79    pub directory_hint_shard_table_offset: u64,
80    pub dictionary_first_block: u64,
81    pub dictionary_data_block_count: u32,
82    pub dictionary_parity_block_count: u32,
83    pub dictionary_encrypted_size: u32,
84    pub dictionary_decompressed_size: u32,
85}
86
87#[derive(Debug, Clone, PartialEq, Eq)]
88pub struct IndexRoot {
89    pub header: IndexRootHeader,
90    pub shards: Vec<ShardEntry>,
91    pub directory_hint_shards: Vec<DirectoryHintShardEntry>,
92}
93
94#[derive(Debug, Clone, PartialEq, Eq)]
95pub struct ShardEntry {
96    pub shard_index: u64,
97    pub first_block_index: u64,
98    pub data_block_count: u32,
99    pub parity_block_count: u32,
100    pub encrypted_size: u32,
101    pub decompressed_size: u32,
102    pub file_count: u32,
103    pub first_path_hash: [u8; 8],
104    pub last_path_hash: [u8; 8],
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
108pub struct DirectoryHintShardEntry {
109    pub hint_shard_index: u64,
110    pub first_dir_hash: [u8; 8],
111    pub last_dir_hash: [u8; 8],
112    pub first_block_index: u64,
113    pub data_block_count: u32,
114    pub parity_block_count: u32,
115    pub encrypted_size: u32,
116    pub decompressed_size: u32,
117    pub entry_count: u64,
118}
119
120#[derive(Debug, Clone, PartialEq, Eq)]
121pub struct EnvelopeEntry {
122    pub envelope_index: u64,
123    pub first_block_index: u64,
124    pub data_block_count: u32,
125    pub parity_block_count: u32,
126    pub encrypted_size: u32,
127    pub plaintext_size: u32,
128    pub first_frame_index: u64,
129    pub frame_count: u32,
130}
131
132#[derive(Debug, Clone, PartialEq, Eq)]
133pub struct FrameEntry {
134    pub frame_index: u64,
135    pub envelope_index: u64,
136    pub offset_in_envelope: u32,
137    pub compressed_size: u32,
138    pub decompressed_size: u32,
139    pub flags: u32,
140    pub tar_stream_offset: u64,
141}
142
143#[derive(Debug, Clone, PartialEq, Eq)]
144pub struct IndexShardHeader {
145    pub version: u32,
146    pub shard_index: u64,
147    pub file_count: u32,
148    pub frame_count: u32,
149    pub envelope_count: u32,
150    pub file_table_offset: u32,
151    pub frame_table_offset: u32,
152    pub envelope_table_offset: u32,
153    pub string_pool_offset: u32,
154    pub string_pool_size: u32,
155}
156
157#[derive(Debug, Clone, PartialEq, Eq)]
158pub struct IndexShard {
159    pub header: IndexShardHeader,
160    pub files: Vec<FileEntry>,
161    pub frames: Vec<FrameEntry>,
162    pub envelopes: Vec<EnvelopeEntry>,
163    pub string_pool: Vec<u8>,
164    file_paths: Vec<Vec<u8>>,
165    file_tar_member_group_starts: Vec<u64>,
166}
167
168#[derive(Debug, Clone, PartialEq, Eq)]
169pub struct FileEntry {
170    pub path_hash: [u8; 8],
171    pub path_offset: u32,
172    pub path_length: u32,
173    pub first_frame_index: u64,
174    pub frame_count: u32,
175    pub offset_in_first_frame_plaintext: u32,
176    pub tar_member_group_size: u64,
177    pub file_data_size: u64,
178    pub flags: u32,
179}
180
181#[derive(Debug, Clone, PartialEq, Eq)]
182pub struct DirectoryHintTable {
183    pub header: DirectoryHintTableHeader,
184    pub entries: Vec<DirectoryHintEntry>,
185    pub shard_row_indexes: Vec<u32>,
186    pub string_pool: Vec<u8>,
187    entry_paths: Vec<Vec<u8>>,
188}
189
190#[derive(Debug, Clone, PartialEq, Eq)]
191pub struct DirectoryHintTableHeader {
192    pub version: u32,
193    pub hint_shard_index: u64,
194    pub entry_count: u64,
195    pub entry_table_offset: u64,
196    pub shard_list_offset: u64,
197    pub string_pool_offset: u64,
198    pub string_pool_size: u64,
199}
200
201#[derive(Debug, Clone, PartialEq, Eq)]
202pub struct DirectoryHintEntry {
203    pub dir_hash: [u8; 8],
204    pub path_offset: u64,
205    pub path_length: u32,
206    pub shard_list_start_index: u32,
207    pub shard_count: u32,
208}
209
210impl IndexRoot {
211    pub fn parse(
212        bytes: &[u8],
213        has_dictionary: bool,
214        limits: MetadataLimits,
215    ) -> Result<Self, FormatError> {
216        let structure = "IndexRoot";
217        if bytes.len() < INDEX_ROOT_LEN {
218            return invalid(structure, "plaintext is shorter than fixed header");
219        }
220        expect_magic(structure, TZIR_MAGIC, read_array::<4>(bytes, 0, structure)?)?;
221        expect_zero(structure, slice(bytes, 128, 32, structure)?)?;
222
223        let header = IndexRootHeader {
224            version: read_u32(bytes, 4, structure)?,
225            shard_count: read_u32(bytes, 8, structure)?,
226            directory_hint_shard_count: read_u32(bytes, 12, structure)?,
227            frame_count: read_u64(bytes, 16, structure)?,
228            envelope_count: read_u64(bytes, 24, structure)?,
229            file_count: read_u64(bytes, 32, structure)?,
230            payload_block_count: read_u64(bytes, 40, structure)?,
231            tar_total_size: read_u64(bytes, 48, structure)?,
232            content_sha256: read_array::<32>(bytes, 56, structure)?,
233            shard_table_offset: read_u64(bytes, 88, structure)?,
234            directory_hint_shard_table_offset: read_u64(bytes, 96, structure)?,
235            dictionary_first_block: read_u64(bytes, 104, structure)?,
236            dictionary_data_block_count: read_u32(bytes, 112, structure)?,
237            dictionary_parity_block_count: read_u32(bytes, 116, structure)?,
238            dictionary_encrypted_size: read_u32(bytes, 120, structure)?,
239            dictionary_decompressed_size: read_u32(bytes, 124, structure)?,
240        };
241
242        if header.version != 1 {
243            return invalid(structure, "unsupported version");
244        }
245        if header.shard_count > limits.max_shard_count {
246            return invalid(structure, "shard count exceeds resource cap");
247        }
248        if header.directory_hint_shard_count > limits.max_directory_hint_shards {
249            return invalid(structure, "directory hint shard count exceeds resource cap");
250        }
251        validate_dictionary_fields(&header, has_dictionary, limits)?;
252
253        let mut cursor = INDEX_ROOT_LEN;
254        let shards = if header.shard_count == 0 {
255            if header.shard_table_offset != 0 {
256                return invalid(structure, "absent shard table has non-zero offset");
257            }
258            Vec::new()
259        } else {
260            expect_offset(structure, "shard table", header.shard_table_offset, cursor)?;
261            let count = to_usize(header.shard_count as u64, structure)?;
262            let bytes_len = checked_mul(count, SHARD_ENTRY_LEN, structure)?;
263            let table = slice(bytes, cursor, bytes_len, structure)?;
264            cursor = checked_add(cursor, bytes_len, structure)?;
265            parse_shard_entries(table, limits)?
266        };
267
268        let directory_hint_shards = if header.directory_hint_shard_count == 0 {
269            if header.directory_hint_shard_table_offset != 0 {
270                return invalid(
271                    structure,
272                    "absent directory hint shard table has non-zero offset",
273                );
274            }
275            Vec::new()
276        } else {
277            if header.shard_count == 0 {
278                return invalid(structure, "directory hints require at least one shard");
279            }
280            expect_offset(
281                structure,
282                "directory hint shard table",
283                header.directory_hint_shard_table_offset,
284                cursor,
285            )?;
286            let count = to_usize(header.directory_hint_shard_count as u64, structure)?;
287            let bytes_len = checked_mul(count, DIRECTORY_HINT_SHARD_ENTRY_LEN, structure)?;
288            let table = slice(bytes, cursor, bytes_len, structure)?;
289            cursor = checked_add(cursor, bytes_len, structure)?;
290            parse_directory_hint_shard_entries(table, limits)?
291        };
292
293        if bytes.len() != cursor {
294            return invalid(
295                structure,
296                "plaintext length does not match canonical cursor",
297            );
298        }
299        validate_index_root_totals(&header, &shards, has_dictionary)?;
300
301        Ok(Self {
302            header,
303            shards,
304            directory_hint_shards,
305        })
306    }
307
308    pub fn to_bytes(&self) -> Vec<u8> {
309        let mut header = self.header.clone();
310        header.shard_count = self.shards.len() as u32;
311        header.directory_hint_shard_count = self.directory_hint_shards.len() as u32;
312        header.shard_table_offset = if self.shards.is_empty() {
313            0
314        } else {
315            INDEX_ROOT_LEN as u64
316        };
317        header.directory_hint_shard_table_offset = if self.directory_hint_shards.is_empty() {
318            0
319        } else {
320            (INDEX_ROOT_LEN + self.shards.len() * SHARD_ENTRY_LEN) as u64
321        };
322
323        let mut bytes = Vec::with_capacity(
324            INDEX_ROOT_LEN
325                + self.shards.len() * SHARD_ENTRY_LEN
326                + self.directory_hint_shards.len() * DIRECTORY_HINT_SHARD_ENTRY_LEN,
327        );
328        bytes.extend_from_slice(&header.to_bytes());
329        for entry in &self.shards {
330            bytes.extend_from_slice(&entry.to_bytes());
331        }
332        for entry in &self.directory_hint_shards {
333            bytes.extend_from_slice(&entry.to_bytes());
334        }
335        bytes
336    }
337
338    pub fn candidate_shard_indexes_for_hash(
339        &self,
340        target_hash: [u8; 8],
341        scan_cap_per_direction: usize,
342    ) -> Result<Vec<usize>, FormatError> {
343        candidate_interval_indexes(
344            &self.shards,
345            target_hash,
346            scan_cap_per_direction,
347            |entry| entry.first_path_hash,
348            |entry| entry.last_path_hash,
349        )
350    }
351
352    pub fn candidate_shards_for_path(
353        &self,
354        normalized_path: &[u8],
355        limits: MetadataLimits,
356    ) -> Result<Vec<usize>, FormatError> {
357        self.candidate_shard_indexes_for_hash(
358            hash_prefix(normalized_path),
359            limits.max_hash_collision_shard_scan,
360        )
361    }
362}
363
364impl IndexRootHeader {
365    pub fn empty() -> Self {
366        Self {
367            version: 1,
368            shard_count: 0,
369            directory_hint_shard_count: 0,
370            frame_count: 0,
371            envelope_count: 0,
372            file_count: 0,
373            payload_block_count: 0,
374            tar_total_size: 0,
375            content_sha256: SHA256_EMPTY,
376            shard_table_offset: 0,
377            directory_hint_shard_table_offset: 0,
378            dictionary_first_block: 0,
379            dictionary_data_block_count: 0,
380            dictionary_parity_block_count: 0,
381            dictionary_encrypted_size: 0,
382            dictionary_decompressed_size: 0,
383        }
384    }
385
386    pub fn to_bytes(&self) -> [u8; INDEX_ROOT_LEN] {
387        let mut bytes = [0u8; INDEX_ROOT_LEN];
388        bytes[0..4].copy_from_slice(&TZIR_MAGIC);
389        write_u32(&mut bytes, 4, self.version);
390        write_u32(&mut bytes, 8, self.shard_count);
391        write_u32(&mut bytes, 12, self.directory_hint_shard_count);
392        write_u64(&mut bytes, 16, self.frame_count);
393        write_u64(&mut bytes, 24, self.envelope_count);
394        write_u64(&mut bytes, 32, self.file_count);
395        write_u64(&mut bytes, 40, self.payload_block_count);
396        write_u64(&mut bytes, 48, self.tar_total_size);
397        bytes[56..88].copy_from_slice(&self.content_sha256);
398        write_u64(&mut bytes, 88, self.shard_table_offset);
399        write_u64(&mut bytes, 96, self.directory_hint_shard_table_offset);
400        write_u64(&mut bytes, 104, self.dictionary_first_block);
401        write_u32(&mut bytes, 112, self.dictionary_data_block_count);
402        write_u32(&mut bytes, 116, self.dictionary_parity_block_count);
403        write_u32(&mut bytes, 120, self.dictionary_encrypted_size);
404        write_u32(&mut bytes, 124, self.dictionary_decompressed_size);
405        bytes
406    }
407}
408
409impl ShardEntry {
410    pub fn to_bytes(&self) -> [u8; SHARD_ENTRY_LEN] {
411        let mut bytes = [0u8; SHARD_ENTRY_LEN];
412        write_u64(&mut bytes, 0, self.shard_index);
413        write_u64(&mut bytes, 8, self.first_block_index);
414        write_u32(&mut bytes, 16, self.data_block_count);
415        write_u32(&mut bytes, 20, self.parity_block_count);
416        write_u32(&mut bytes, 24, self.encrypted_size);
417        write_u32(&mut bytes, 28, self.decompressed_size);
418        write_u32(&mut bytes, 32, self.file_count);
419        bytes[36..44].copy_from_slice(&self.first_path_hash);
420        bytes[44..52].copy_from_slice(&self.last_path_hash);
421        bytes
422    }
423}
424
425impl DirectoryHintShardEntry {
426    pub fn to_bytes(&self) -> [u8; DIRECTORY_HINT_SHARD_ENTRY_LEN] {
427        let mut bytes = [0u8; DIRECTORY_HINT_SHARD_ENTRY_LEN];
428        write_u64(&mut bytes, 0, self.hint_shard_index);
429        bytes[8..16].copy_from_slice(&self.first_dir_hash);
430        bytes[16..24].copy_from_slice(&self.last_dir_hash);
431        write_u64(&mut bytes, 24, self.first_block_index);
432        write_u32(&mut bytes, 32, self.data_block_count);
433        write_u32(&mut bytes, 36, self.parity_block_count);
434        write_u32(&mut bytes, 40, self.encrypted_size);
435        write_u32(&mut bytes, 44, self.decompressed_size);
436        write_u64(&mut bytes, 48, self.entry_count);
437        bytes
438    }
439}
440
441impl IndexShard {
442    pub fn parse(
443        bytes: &[u8],
444        locating_shard: &ShardEntry,
445        limits: MetadataLimits,
446    ) -> Result<Self, FormatError> {
447        let structure = "IndexShard";
448        if bytes.len() < INDEX_SHARD_HEADER_LEN {
449            return invalid(structure, "plaintext is shorter than fixed header");
450        }
451        expect_magic(structure, TZIS_MAGIC, read_array::<4>(bytes, 0, structure)?)?;
452        expect_zero(structure, slice(bytes, 48, 16, structure)?)?;
453
454        let header = IndexShardHeader {
455            version: read_u32(bytes, 4, structure)?,
456            shard_index: read_u64(bytes, 8, structure)?,
457            file_count: read_u32(bytes, 16, structure)?,
458            frame_count: read_u32(bytes, 20, structure)?,
459            envelope_count: read_u32(bytes, 24, structure)?,
460            file_table_offset: read_u32(bytes, 28, structure)?,
461            frame_table_offset: read_u32(bytes, 32, structure)?,
462            envelope_table_offset: read_u32(bytes, 36, structure)?,
463            string_pool_offset: read_u32(bytes, 40, structure)?,
464            string_pool_size: read_u32(bytes, 44, structure)?,
465        };
466
467        if header.version != 1 {
468            return invalid(structure, "unsupported version");
469        }
470        if header.file_count == 0 {
471            return invalid(structure, "index shard must contain at least one file");
472        }
473        if header.file_count > limits.max_files_per_index_shard {
474            return invalid(structure, "file count exceeds resource cap");
475        }
476        if header.shard_index != locating_shard.shard_index {
477            return invalid(structure, "shard index does not match locating ShardEntry");
478        }
479        if header.file_count != locating_shard.file_count {
480            return invalid(structure, "file count does not match locating ShardEntry");
481        }
482
483        let mut cursor = INDEX_SHARD_HEADER_LEN;
484        let files = parse_counted_table(
485            bytes,
486            structure,
487            "file table",
488            header.file_count as u64,
489            header.file_table_offset as u64,
490            FILE_ENTRY_LEN,
491            &mut cursor,
492            parse_file_entry,
493        )?;
494        let frames = parse_counted_table(
495            bytes,
496            structure,
497            "frame table",
498            header.frame_count as u64,
499            header.frame_table_offset as u64,
500            FRAME_ENTRY_LEN,
501            &mut cursor,
502            parse_frame_entry,
503        )?;
504        let envelopes = parse_counted_table(
505            bytes,
506            structure,
507            "envelope table",
508            header.envelope_count as u64,
509            header.envelope_table_offset as u64,
510            ENVELOPE_ENTRY_LEN,
511            &mut cursor,
512            parse_envelope_entry,
513        )?;
514        let string_pool = if header.string_pool_size == 0 {
515            if header.string_pool_offset != 0 {
516                return invalid(structure, "absent string pool has non-zero offset");
517            }
518            Vec::new()
519        } else {
520            expect_offset(
521                structure,
522                "string pool",
523                header.string_pool_offset as u64,
524                cursor,
525            )?;
526            let len = header.string_pool_size as usize;
527            let pool = slice(bytes, cursor, len, structure)?.to_vec();
528            cursor = checked_add(cursor, len, structure)?;
529            pool
530        };
531        if bytes.len() != cursor {
532            return invalid(
533                structure,
534                "plaintext length does not match canonical cursor",
535            );
536        }
537
538        let (file_paths, file_tar_member_group_starts) = validate_index_shard_tables(
539            &files,
540            &frames,
541            &envelopes,
542            &string_pool,
543            locating_shard,
544            limits,
545        )?;
546
547        Ok(Self {
548            header,
549            files,
550            frames,
551            envelopes,
552            string_pool,
553            file_paths,
554            file_tar_member_group_starts,
555        })
556    }
557
558    pub fn to_bytes(&self) -> Vec<u8> {
559        let mut header = self.header.clone();
560        header.file_count = self.files.len() as u32;
561        header.frame_count = self.frames.len() as u32;
562        header.envelope_count = self.envelopes.len() as u32;
563
564        let mut cursor = INDEX_SHARD_HEADER_LEN;
565        header.file_table_offset = table_offset(self.files.len(), cursor);
566        cursor += self.files.len() * FILE_ENTRY_LEN;
567        header.frame_table_offset = table_offset(self.frames.len(), cursor);
568        cursor += self.frames.len() * FRAME_ENTRY_LEN;
569        header.envelope_table_offset = table_offset(self.envelopes.len(), cursor);
570        cursor += self.envelopes.len() * ENVELOPE_ENTRY_LEN;
571        header.string_pool_size = self.string_pool.len() as u32;
572        header.string_pool_offset = table_offset(self.string_pool.len(), cursor);
573
574        let mut bytes = Vec::with_capacity(cursor + self.string_pool.len());
575        bytes.extend_from_slice(&header.to_bytes());
576        for entry in &self.files {
577            bytes.extend_from_slice(&entry.to_bytes());
578        }
579        for entry in &self.frames {
580            bytes.extend_from_slice(&entry.to_bytes());
581        }
582        for entry in &self.envelopes {
583            bytes.extend_from_slice(&entry.to_bytes());
584        }
585        bytes.extend_from_slice(&self.string_pool);
586        bytes
587    }
588
589    pub fn file_path(&self, file_index: usize) -> Option<&[u8]> {
590        self.file_paths.get(file_index).map(Vec::as_slice)
591    }
592
593    pub fn tar_member_group_start(&self, file_index: usize) -> Option<u64> {
594        self.file_tar_member_group_starts.get(file_index).copied()
595    }
596
597    pub fn lookup_file_index(&self, normalized_path: &[u8]) -> Option<usize> {
598        let target_hash = hash_prefix(normalized_path);
599        let lower = self.lower_bound_file_key(target_hash, normalized_path);
600
601        let mut best = None;
602        for idx in lower..self.files.len() {
603            let file = &self.files[idx];
604            if file.path_hash != target_hash || self.file_paths[idx].as_slice() != normalized_path {
605                break;
606            }
607            best = Some(idx);
608        }
609        best
610    }
611
612    fn lower_bound_file_key(&self, target_hash: [u8; 8], target_path: &[u8]) -> usize {
613        let mut low = 0usize;
614        let mut high = self.files.len();
615        while low < high {
616            let mid = low + (high - low) / 2;
617            let key_is_less = self.files[mid].path_hash < target_hash
618                || (self.files[mid].path_hash == target_hash
619                    && self.file_paths[mid].as_slice() < target_path);
620            if key_is_less {
621                low = mid + 1;
622            } else {
623                high = mid;
624            }
625        }
626        low
627    }
628}
629
630impl IndexShardHeader {
631    pub fn to_bytes(&self) -> [u8; INDEX_SHARD_HEADER_LEN] {
632        let mut bytes = [0u8; INDEX_SHARD_HEADER_LEN];
633        bytes[0..4].copy_from_slice(&TZIS_MAGIC);
634        write_u32(&mut bytes, 4, self.version);
635        write_u64(&mut bytes, 8, self.shard_index);
636        write_u32(&mut bytes, 16, self.file_count);
637        write_u32(&mut bytes, 20, self.frame_count);
638        write_u32(&mut bytes, 24, self.envelope_count);
639        write_u32(&mut bytes, 28, self.file_table_offset);
640        write_u32(&mut bytes, 32, self.frame_table_offset);
641        write_u32(&mut bytes, 36, self.envelope_table_offset);
642        write_u32(&mut bytes, 40, self.string_pool_offset);
643        write_u32(&mut bytes, 44, self.string_pool_size);
644        bytes
645    }
646}
647
648impl FileEntry {
649    pub fn to_bytes(&self) -> [u8; FILE_ENTRY_LEN] {
650        let mut bytes = [0u8; FILE_ENTRY_LEN];
651        bytes[0..8].copy_from_slice(&self.path_hash);
652        write_u32(&mut bytes, 8, self.path_offset);
653        write_u32(&mut bytes, 12, self.path_length);
654        write_u64(&mut bytes, 16, self.first_frame_index);
655        write_u32(&mut bytes, 24, self.frame_count);
656        write_u32(&mut bytes, 28, self.offset_in_first_frame_plaintext);
657        write_u64(&mut bytes, 32, self.tar_member_group_size);
658        write_u64(&mut bytes, 40, self.file_data_size);
659        write_u32(&mut bytes, 48, self.flags);
660        bytes
661    }
662}
663
664impl FrameEntry {
665    pub fn to_bytes(&self) -> [u8; FRAME_ENTRY_LEN] {
666        let mut bytes = [0u8; FRAME_ENTRY_LEN];
667        write_u64(&mut bytes, 0, self.frame_index);
668        write_u64(&mut bytes, 8, self.envelope_index);
669        write_u32(&mut bytes, 16, self.offset_in_envelope);
670        write_u32(&mut bytes, 20, self.compressed_size);
671        write_u32(&mut bytes, 24, self.decompressed_size);
672        write_u32(&mut bytes, 28, self.flags);
673        write_u64(&mut bytes, 32, self.tar_stream_offset);
674        bytes
675    }
676}
677
678impl EnvelopeEntry {
679    pub fn to_bytes(&self) -> [u8; ENVELOPE_ENTRY_LEN] {
680        let mut bytes = [0u8; ENVELOPE_ENTRY_LEN];
681        write_u64(&mut bytes, 0, self.envelope_index);
682        write_u64(&mut bytes, 8, self.first_block_index);
683        write_u32(&mut bytes, 16, self.data_block_count);
684        write_u32(&mut bytes, 20, self.parity_block_count);
685        write_u32(&mut bytes, 24, self.encrypted_size);
686        write_u32(&mut bytes, 28, self.plaintext_size);
687        write_u64(&mut bytes, 32, self.first_frame_index);
688        write_u32(&mut bytes, 40, self.frame_count);
689        bytes
690    }
691}
692
693impl DirectoryHintTable {
694    pub fn parse(
695        bytes: &[u8],
696        locating_shard: &DirectoryHintShardEntry,
697        index_root_shard_count: u32,
698        limits: MetadataLimits,
699    ) -> Result<Self, FormatError> {
700        let structure = "DirectoryHintTable";
701        if bytes.len() < DIRECTORY_HINT_TABLE_LEN {
702            return invalid(structure, "plaintext is shorter than fixed header");
703        }
704        expect_magic(structure, TZDH_MAGIC, read_array::<4>(bytes, 0, structure)?)?;
705        expect_zero(structure, slice(bytes, 56, 16, structure)?)?;
706
707        let header = DirectoryHintTableHeader {
708            version: read_u32(bytes, 4, structure)?,
709            hint_shard_index: read_u64(bytes, 8, structure)?,
710            entry_count: read_u64(bytes, 16, structure)?,
711            entry_table_offset: read_u64(bytes, 24, structure)?,
712            shard_list_offset: read_u64(bytes, 32, structure)?,
713            string_pool_offset: read_u64(bytes, 40, structure)?,
714            string_pool_size: read_u64(bytes, 48, structure)?,
715        };
716        if header.version != 1 {
717            return invalid(structure, "unsupported version");
718        }
719        if header.hint_shard_index != locating_shard.hint_shard_index {
720            return invalid(
721                structure,
722                "hint shard index does not match locating DirectoryHintShardEntry",
723            );
724        }
725        if header.entry_count != locating_shard.entry_count {
726            return invalid(
727                structure,
728                "entry count does not match locating DirectoryHintShardEntry",
729            );
730        }
731        if header.entry_count == 0 {
732            return invalid(structure, "located directory hint shard is empty");
733        }
734        if header.entry_count > limits.max_entries_per_directory_hint_shard {
735            return invalid(structure, "entry count exceeds resource cap");
736        }
737
738        let entry_count = to_usize(header.entry_count, structure)?;
739        expect_offset(
740            structure,
741            "entry table",
742            header.entry_table_offset,
743            DIRECTORY_HINT_TABLE_LEN,
744        )?;
745        let entry_bytes_len = checked_mul(entry_count, DIRECTORY_HINT_ENTRY_LEN, structure)?;
746        let entries_end = checked_add(DIRECTORY_HINT_TABLE_LEN, entry_bytes_len, structure)?;
747        expect_offset(
748            structure,
749            "shard list",
750            header.shard_list_offset,
751            entries_end,
752        )?;
753        if header.shard_list_offset % 4 != 0 {
754            return invalid(structure, "shard list is not 4-byte aligned");
755        }
756
757        let entry_bytes = slice(bytes, DIRECTORY_HINT_TABLE_LEN, entry_bytes_len, structure)?;
758        let entries = parse_directory_hint_entries(entry_bytes)?;
759        let shard_list_len = validate_directory_hint_entries(
760            &entries,
761            bytes,
762            &header,
763            locating_shard,
764            index_root_shard_count,
765        )?;
766        let shard_list_offset = to_usize(header.shard_list_offset, structure)?;
767        let shard_list_bytes_len = checked_mul(shard_list_len, 4, structure)?;
768        let shard_list_end = checked_add(shard_list_offset, shard_list_bytes_len, structure)?;
769        let shard_list_bytes = slice(bytes, shard_list_offset, shard_list_bytes_len, structure)?;
770        let shard_row_indexes = parse_u32_array(shard_list_bytes, structure)?;
771
772        let string_pool = if header.string_pool_size == 0 {
773            if header.string_pool_offset != 0 {
774                return invalid(structure, "absent string pool has non-zero offset");
775            }
776            Vec::new()
777        } else {
778            expect_offset(
779                structure,
780                "string pool",
781                header.string_pool_offset,
782                shard_list_end,
783            )?;
784            let offset = to_usize(header.string_pool_offset, structure)?;
785            let size = to_usize(header.string_pool_size, structure)?;
786            slice(bytes, offset, size, structure)?.to_vec()
787        };
788        let final_cursor = if header.string_pool_size == 0 {
789            shard_list_end
790        } else {
791            checked_add(
792                to_usize(header.string_pool_offset, structure)?,
793                to_usize(header.string_pool_size, structure)?,
794                structure,
795            )?
796        };
797        if bytes.len() != final_cursor {
798            return invalid(
799                structure,
800                "plaintext length does not match canonical cursor",
801            );
802        }
803
804        let entry_paths = validate_directory_hint_paths_and_lists(
805            &entries,
806            &shard_row_indexes,
807            &string_pool,
808            locating_shard,
809            index_root_shard_count,
810            limits.max_path_length,
811        )?;
812
813        Ok(Self {
814            header,
815            entries,
816            shard_row_indexes,
817            string_pool,
818            entry_paths,
819        })
820    }
821
822    pub fn to_bytes(&self) -> Vec<u8> {
823        let mut header = self.header.clone();
824        header.entry_count = self.entries.len() as u64;
825        header.entry_table_offset = if self.entries.is_empty() {
826            0
827        } else {
828            DIRECTORY_HINT_TABLE_LEN as u64
829        };
830        header.shard_list_offset = if self.entries.is_empty() {
831            0
832        } else {
833            (DIRECTORY_HINT_TABLE_LEN + self.entries.len() * DIRECTORY_HINT_ENTRY_LEN) as u64
834        };
835        header.string_pool_size = self.string_pool.len() as u64;
836        header.string_pool_offset = if self.string_pool.is_empty() {
837            0
838        } else {
839            header.shard_list_offset + (self.shard_row_indexes.len() as u64) * 4
840        };
841
842        let mut bytes = Vec::with_capacity(
843            DIRECTORY_HINT_TABLE_LEN
844                + self.entries.len() * DIRECTORY_HINT_ENTRY_LEN
845                + self.shard_row_indexes.len() * 4
846                + self.string_pool.len(),
847        );
848        bytes.extend_from_slice(&header.to_bytes());
849        for entry in &self.entries {
850            bytes.extend_from_slice(&entry.to_bytes());
851        }
852        if !self.entries.is_empty() {
853            for row in &self.shard_row_indexes {
854                let mut raw = [0u8; 4];
855                write_u32(&mut raw, 0, *row);
856                bytes.extend_from_slice(&raw);
857            }
858        }
859        bytes.extend_from_slice(&self.string_pool);
860        bytes
861    }
862
863    pub fn entry_path(&self, entry_index: usize) -> Option<&[u8]> {
864        self.entry_paths.get(entry_index).map(Vec::as_slice)
865    }
866
867    pub fn lookup_directory_index(&self, normalized_dir_path: &[u8]) -> Option<usize> {
868        let target_hash = hash_prefix(normalized_dir_path);
869        let lower = self.lower_bound_directory_key(target_hash, normalized_dir_path);
870        for idx in lower..self.entries.len() {
871            let entry = &self.entries[idx];
872            if entry.dir_hash != target_hash
873                || self.entry_paths[idx].as_slice() != normalized_dir_path
874            {
875                break;
876            }
877            return Some(idx);
878        }
879        None
880    }
881
882    fn lower_bound_directory_key(&self, target_hash: [u8; 8], target_path: &[u8]) -> usize {
883        let mut low = 0usize;
884        let mut high = self.entries.len();
885        while low < high {
886            let mid = low + (high - low) / 2;
887            let key_is_less = self.entries[mid].dir_hash < target_hash
888                || (self.entries[mid].dir_hash == target_hash
889                    && self.entry_paths[mid].as_slice() < target_path);
890            if key_is_less {
891                low = mid + 1;
892            } else {
893                high = mid;
894            }
895        }
896        low
897    }
898
899    pub fn shard_rows_for_entry(&self, entry_index: usize) -> Option<&[u32]> {
900        let entry = self.entries.get(entry_index)?;
901        let start = entry.shard_list_start_index as usize;
902        let end = start.checked_add(entry.shard_count as usize)?;
903        self.shard_row_indexes.get(start..end)
904    }
905}
906
907impl DirectoryHintTableHeader {
908    pub fn to_bytes(&self) -> [u8; DIRECTORY_HINT_TABLE_LEN] {
909        let mut bytes = [0u8; DIRECTORY_HINT_TABLE_LEN];
910        bytes[0..4].copy_from_slice(&TZDH_MAGIC);
911        write_u32(&mut bytes, 4, self.version);
912        write_u64(&mut bytes, 8, self.hint_shard_index);
913        write_u64(&mut bytes, 16, self.entry_count);
914        write_u64(&mut bytes, 24, self.entry_table_offset);
915        write_u64(&mut bytes, 32, self.shard_list_offset);
916        write_u64(&mut bytes, 40, self.string_pool_offset);
917        write_u64(&mut bytes, 48, self.string_pool_size);
918        bytes
919    }
920}
921
922impl DirectoryHintEntry {
923    pub fn to_bytes(&self) -> [u8; DIRECTORY_HINT_ENTRY_LEN] {
924        let mut bytes = [0u8; DIRECTORY_HINT_ENTRY_LEN];
925        bytes[0..8].copy_from_slice(&self.dir_hash);
926        write_u64(&mut bytes, 8, self.path_offset);
927        write_u32(&mut bytes, 16, self.path_length);
928        write_u32(&mut bytes, 24, self.shard_list_start_index);
929        write_u32(&mut bytes, 28, self.shard_count);
930        bytes
931    }
932}
933
934pub fn hash_prefix(bytes: &[u8]) -> [u8; 8] {
935    let digest = Sha256::digest(bytes);
936    let mut out = [0u8; 8];
937    out.copy_from_slice(&digest[..8]);
938    out
939}
940
941pub fn normalize_lookup_file_path(
942    path: &str,
943    max_path_length: u32,
944) -> Result<Vec<u8>, FormatError> {
945    let normalized = path.nfc().collect::<String>();
946    validate_file_path_bytes(normalized.as_bytes(), max_path_length)?;
947    Ok(normalized.into_bytes())
948}
949
950pub fn normalize_lookup_directory_path(
951    path: &str,
952    max_path_length: u32,
953) -> Result<Vec<u8>, FormatError> {
954    let trimmed = path.strip_suffix('/').unwrap_or(path);
955    let normalized = trimmed.nfc().collect::<String>();
956    validate_directory_path_bytes(normalized.as_bytes(), max_path_length)?;
957    Ok(normalized.into_bytes())
958}
959
960pub fn is_directory_ancestor(directory_path: &[u8], file_path: &[u8]) -> bool {
961    if directory_path.is_empty() {
962        return true;
963    }
964    file_path.len() > directory_path.len()
965        && file_path.starts_with(directory_path)
966        && file_path[directory_path.len()] == b'/'
967}
968
969fn parse_shard_entries(
970    bytes: &[u8],
971    limits: MetadataLimits,
972) -> Result<Vec<ShardEntry>, FormatError> {
973    let mut entries = Vec::with_capacity(bytes.len() / SHARD_ENTRY_LEN);
974    let mut seen_indexes = HashSet::new();
975    for chunk in bytes.chunks_exact(SHARD_ENTRY_LEN) {
976        let entry = ShardEntry {
977            shard_index: read_u64(chunk, 0, "ShardEntry")?,
978            first_block_index: read_u64(chunk, 8, "ShardEntry")?,
979            data_block_count: read_u32(chunk, 16, "ShardEntry")?,
980            parity_block_count: read_u32(chunk, 20, "ShardEntry")?,
981            encrypted_size: read_u32(chunk, 24, "ShardEntry")?,
982            decompressed_size: read_u32(chunk, 28, "ShardEntry")?,
983            file_count: read_u32(chunk, 32, "ShardEntry")?,
984            first_path_hash: read_array::<8>(chunk, 36, "ShardEntry")?,
985            last_path_hash: read_array::<8>(chunk, 44, "ShardEntry")?,
986        };
987        if entry.file_count == 0 {
988            return invalid("ShardEntry", "file count is zero");
989        }
990        if entry.decompressed_size == 0 {
991            return invalid("ShardEntry", "decompressed size is zero");
992        }
993        validate_encrypted_extent(
994            "ShardEntry",
995            entry.data_block_count,
996            entry.encrypted_size,
997            limits.block_size,
998        )?;
999        validate_fec_class_extent(
1000            "ShardEntry",
1001            entry.data_block_count,
1002            entry.parity_block_count,
1003            limits.max_index_data_shards,
1004            limits.max_index_parity_shards,
1005        )?;
1006        if entry.first_path_hash > entry.last_path_hash {
1007            return invalid("ShardEntry", "first hash is greater than last hash");
1008        }
1009        if !seen_indexes.insert(entry.shard_index) {
1010            return invalid("ShardEntry", "duplicate shard index");
1011        }
1012        if let Some(previous) = entries.last() {
1013            let previous: &ShardEntry = previous;
1014            if shard_entry_sort_key(previous) >= shard_entry_sort_key(&entry) {
1015                return invalid("IndexRoot", "ShardEntry rows are not sorted");
1016            }
1017            if previous.last_path_hash > entry.first_path_hash {
1018                return invalid("IndexRoot", "ShardEntry hash ranges overlap out of order");
1019            }
1020        }
1021        entries.push(entry);
1022    }
1023    Ok(entries)
1024}
1025
1026fn parse_directory_hint_shard_entries(
1027    bytes: &[u8],
1028    limits: MetadataLimits,
1029) -> Result<Vec<DirectoryHintShardEntry>, FormatError> {
1030    let mut entries = Vec::with_capacity(bytes.len() / DIRECTORY_HINT_SHARD_ENTRY_LEN);
1031    let mut seen_indexes = HashSet::new();
1032    for chunk in bytes.chunks_exact(DIRECTORY_HINT_SHARD_ENTRY_LEN) {
1033        let entry = DirectoryHintShardEntry {
1034            hint_shard_index: read_u64(chunk, 0, "DirectoryHintShardEntry")?,
1035            first_dir_hash: read_array::<8>(chunk, 8, "DirectoryHintShardEntry")?,
1036            last_dir_hash: read_array::<8>(chunk, 16, "DirectoryHintShardEntry")?,
1037            first_block_index: read_u64(chunk, 24, "DirectoryHintShardEntry")?,
1038            data_block_count: read_u32(chunk, 32, "DirectoryHintShardEntry")?,
1039            parity_block_count: read_u32(chunk, 36, "DirectoryHintShardEntry")?,
1040            encrypted_size: read_u32(chunk, 40, "DirectoryHintShardEntry")?,
1041            decompressed_size: read_u32(chunk, 44, "DirectoryHintShardEntry")?,
1042            entry_count: read_u64(chunk, 48, "DirectoryHintShardEntry")?,
1043        };
1044        if entry.entry_count == 0 {
1045            return invalid("DirectoryHintShardEntry", "entry count is zero");
1046        }
1047        if entry.decompressed_size == 0 {
1048            return invalid("DirectoryHintShardEntry", "decompressed size is zero");
1049        }
1050        validate_encrypted_extent(
1051            "DirectoryHintShardEntry",
1052            entry.data_block_count,
1053            entry.encrypted_size,
1054            limits.block_size,
1055        )?;
1056        validate_fec_class_extent(
1057            "DirectoryHintShardEntry",
1058            entry.data_block_count,
1059            entry.parity_block_count,
1060            limits.max_index_data_shards,
1061            limits.max_index_parity_shards,
1062        )?;
1063        if entry.first_dir_hash > entry.last_dir_hash {
1064            return invalid(
1065                "DirectoryHintShardEntry",
1066                "first hash is greater than last hash",
1067            );
1068        }
1069        if !seen_indexes.insert(entry.hint_shard_index) {
1070            return invalid("DirectoryHintShardEntry", "duplicate hint shard index");
1071        }
1072        if let Some(previous) = entries.last() {
1073            let previous: &DirectoryHintShardEntry = previous;
1074            if directory_hint_shard_sort_key(previous) >= directory_hint_shard_sort_key(&entry) {
1075                return invalid("IndexRoot", "DirectoryHintShardEntry rows are not sorted");
1076            }
1077            if previous.last_dir_hash > entry.first_dir_hash {
1078                return invalid(
1079                    "IndexRoot",
1080                    "DirectoryHintShardEntry hash ranges overlap out of order",
1081                );
1082            }
1083        }
1084        entries.push(entry);
1085    }
1086    Ok(entries)
1087}
1088
1089fn parse_file_entry(bytes: &[u8]) -> Result<FileEntry, FormatError> {
1090    expect_zero("FileEntry", slice(bytes, 52, 4, "FileEntry")?)?;
1091    Ok(FileEntry {
1092        path_hash: read_array::<8>(bytes, 0, "FileEntry")?,
1093        path_offset: read_u32(bytes, 8, "FileEntry")?,
1094        path_length: read_u32(bytes, 12, "FileEntry")?,
1095        first_frame_index: read_u64(bytes, 16, "FileEntry")?,
1096        frame_count: read_u32(bytes, 24, "FileEntry")?,
1097        offset_in_first_frame_plaintext: read_u32(bytes, 28, "FileEntry")?,
1098        tar_member_group_size: read_u64(bytes, 32, "FileEntry")?,
1099        file_data_size: read_u64(bytes, 40, "FileEntry")?,
1100        flags: read_u32(bytes, 48, "FileEntry")?,
1101    })
1102}
1103
1104fn parse_frame_entry(bytes: &[u8]) -> Result<FrameEntry, FormatError> {
1105    expect_zero("FrameEntry", slice(bytes, 40, 4, "FrameEntry")?)?;
1106    Ok(FrameEntry {
1107        frame_index: read_u64(bytes, 0, "FrameEntry")?,
1108        envelope_index: read_u64(bytes, 8, "FrameEntry")?,
1109        offset_in_envelope: read_u32(bytes, 16, "FrameEntry")?,
1110        compressed_size: read_u32(bytes, 20, "FrameEntry")?,
1111        decompressed_size: read_u32(bytes, 24, "FrameEntry")?,
1112        flags: read_u32(bytes, 28, "FrameEntry")?,
1113        tar_stream_offset: read_u64(bytes, 32, "FrameEntry")?,
1114    })
1115}
1116
1117fn parse_envelope_entry(bytes: &[u8]) -> Result<EnvelopeEntry, FormatError> {
1118    expect_zero("EnvelopeEntry", slice(bytes, 44, 4, "EnvelopeEntry")?)?;
1119    Ok(EnvelopeEntry {
1120        envelope_index: read_u64(bytes, 0, "EnvelopeEntry")?,
1121        first_block_index: read_u64(bytes, 8, "EnvelopeEntry")?,
1122        data_block_count: read_u32(bytes, 16, "EnvelopeEntry")?,
1123        parity_block_count: read_u32(bytes, 20, "EnvelopeEntry")?,
1124        encrypted_size: read_u32(bytes, 24, "EnvelopeEntry")?,
1125        plaintext_size: read_u32(bytes, 28, "EnvelopeEntry")?,
1126        first_frame_index: read_u64(bytes, 32, "EnvelopeEntry")?,
1127        frame_count: read_u32(bytes, 40, "EnvelopeEntry")?,
1128    })
1129}
1130
1131fn parse_directory_hint_entries(bytes: &[u8]) -> Result<Vec<DirectoryHintEntry>, FormatError> {
1132    let mut entries = Vec::with_capacity(bytes.len() / DIRECTORY_HINT_ENTRY_LEN);
1133    for chunk in bytes.chunks_exact(DIRECTORY_HINT_ENTRY_LEN) {
1134        expect_zero(
1135            "DirectoryHintEntry",
1136            slice(chunk, 20, 4, "DirectoryHintEntry")?,
1137        )?;
1138        expect_zero(
1139            "DirectoryHintEntry",
1140            slice(chunk, 32, 8, "DirectoryHintEntry")?,
1141        )?;
1142        entries.push(DirectoryHintEntry {
1143            dir_hash: read_array::<8>(chunk, 0, "DirectoryHintEntry")?,
1144            path_offset: read_u64(chunk, 8, "DirectoryHintEntry")?,
1145            path_length: read_u32(chunk, 16, "DirectoryHintEntry")?,
1146            shard_list_start_index: read_u32(chunk, 24, "DirectoryHintEntry")?,
1147            shard_count: read_u32(chunk, 28, "DirectoryHintEntry")?,
1148        });
1149    }
1150    Ok(entries)
1151}
1152
1153fn validate_index_root_totals(
1154    header: &IndexRootHeader,
1155    shards: &[ShardEntry],
1156    has_dictionary: bool,
1157) -> Result<(), FormatError> {
1158    if shards.is_empty() {
1159        if header.file_count != 0
1160            || header.frame_count != 0
1161            || header.envelope_count != 0
1162            || header.payload_block_count != 0
1163            || header.tar_total_size != 0
1164        {
1165            return invalid(
1166                "IndexRoot",
1167                "empty shard table has non-empty archive totals",
1168            );
1169        }
1170        if header.content_sha256 != SHA256_EMPTY {
1171            return invalid(
1172                "IndexRoot",
1173                "empty archive content hash is not SHA-256(empty)",
1174            );
1175        }
1176        if has_dictionary || !index_root_dictionary_fields_are_zero(header) {
1177            return invalid("IndexRoot", "empty archive cannot use dictionary");
1178        }
1179        return Ok(());
1180    }
1181
1182    let mut sum = 0u64;
1183    for shard in shards {
1184        sum = sum.checked_add(shard.file_count as u64).ok_or(
1185            FormatError::MetadataArithmeticOverflow {
1186                structure: "IndexRoot",
1187            },
1188        )?;
1189    }
1190    if sum != header.file_count {
1191        return invalid(
1192            "IndexRoot",
1193            "file_count does not equal sum of ShardEntry rows",
1194        );
1195    }
1196    Ok(())
1197}
1198
1199fn validate_dictionary_fields(
1200    header: &IndexRootHeader,
1201    has_dictionary: bool,
1202    limits: MetadataLimits,
1203) -> Result<(), FormatError> {
1204    if !has_dictionary {
1205        if !index_root_dictionary_fields_are_zero(header) {
1206            return invalid(
1207                "IndexRoot",
1208                "dictionary fields are non-zero while has_dictionary is false",
1209            );
1210        }
1211        return Ok(());
1212    }
1213
1214    if header.dictionary_data_block_count == 0 {
1215        return invalid(
1216            "IndexRoot",
1217            "dictionary data block count is zero while has_dictionary is true",
1218        );
1219    }
1220    if header.dictionary_first_block == 0
1221        || header.dictionary_encrypted_size == 0
1222        || header.dictionary_decompressed_size == 0
1223    {
1224        return invalid("IndexRoot", "required dictionary field is zero");
1225    }
1226    validate_encrypted_extent(
1227        "IndexRoot.dictionary",
1228        header.dictionary_data_block_count,
1229        header.dictionary_encrypted_size,
1230        limits.block_size,
1231    )?;
1232    validate_fec_class_extent(
1233        "IndexRoot.dictionary",
1234        header.dictionary_data_block_count,
1235        header.dictionary_parity_block_count,
1236        limits.max_index_root_data_shards,
1237        limits.max_index_root_parity_shards,
1238    )
1239}
1240
1241fn index_root_dictionary_fields_are_zero(header: &IndexRootHeader) -> bool {
1242    header.dictionary_first_block == 0
1243        && header.dictionary_data_block_count == 0
1244        && header.dictionary_parity_block_count == 0
1245        && header.dictionary_encrypted_size == 0
1246        && header.dictionary_decompressed_size == 0
1247}
1248
1249fn validate_index_shard_tables(
1250    files: &[FileEntry],
1251    frames: &[FrameEntry],
1252    envelopes: &[EnvelopeEntry],
1253    string_pool: &[u8],
1254    locating_shard: &ShardEntry,
1255    limits: MetadataLimits,
1256) -> Result<(Vec<Vec<u8>>, Vec<u64>), FormatError> {
1257    validate_frame_table(frames)?;
1258    validate_envelope_table(envelopes, limits)?;
1259
1260    let frame_by_index = frames
1261        .iter()
1262        .enumerate()
1263        .map(|(idx, frame)| (frame.frame_index, idx))
1264        .collect::<HashMap<_, _>>();
1265    let envelope_by_index = envelopes
1266        .iter()
1267        .enumerate()
1268        .map(|(idx, envelope)| (envelope.envelope_index, idx))
1269        .collect::<HashMap<_, _>>();
1270
1271    let mut paths = Vec::with_capacity(files.len());
1272    let mut starts = Vec::with_capacity(files.len());
1273    let mut required_frames = BTreeSet::new();
1274
1275    for file in files {
1276        if file.flags != 0 {
1277            return invalid("FileEntry", "reserved flags are non-zero");
1278        }
1279        if file.path_length == 0 {
1280            return invalid("FileEntry", "path length is zero");
1281        }
1282        if file.path_length > limits.max_path_length {
1283            return invalid("FileEntry", "path length exceeds configured maximum");
1284        }
1285        if file.frame_count == 0 {
1286            return invalid("FileEntry", "frame count is zero");
1287        }
1288        if file.tar_member_group_size < 512 {
1289            return invalid(
1290                "FileEntry",
1291                "tar member group is smaller than one tar record",
1292            );
1293        }
1294        if file.path_hash < locating_shard.first_path_hash
1295            || file.path_hash > locating_shard.last_path_hash
1296        {
1297            return invalid(
1298                "FileEntry",
1299                "path hash is outside locating ShardEntry bounds",
1300            );
1301        }
1302
1303        let path = string_slice(
1304            string_pool,
1305            file.path_offset as u64,
1306            file.path_length as u64,
1307            "FileEntry",
1308        )?;
1309        validate_file_path_bytes(path, limits.max_path_length)?;
1310        if hash_prefix(path) != file.path_hash {
1311            return invalid("FileEntry", "path hash does not match string-pool path");
1312        }
1313
1314        let first_frame = frame_for_file(file, &frame_by_index, frames, file.first_frame_index)?;
1315        let tar_member_group_start = first_frame
1316            .tar_stream_offset
1317            .checked_add(file.offset_in_first_frame_plaintext as u64)
1318            .ok_or(FormatError::MetadataArithmeticOverflow {
1319                structure: "FileEntry",
1320            })?;
1321        validate_file_frame_range(file, frames, &frame_by_index)?;
1322        for offset in 0..file.frame_count as u64 {
1323            let index = file.first_frame_index.checked_add(offset).ok_or(
1324                FormatError::MetadataArithmeticOverflow {
1325                    structure: "FileEntry",
1326                },
1327            )?;
1328            required_frames.insert(index);
1329        }
1330        paths.push(path.to_vec());
1331        starts.push(tar_member_group_start);
1332    }
1333
1334    validate_file_order(files, &paths, &starts)?;
1335    if required_frames.len() != frames.len()
1336        || frames
1337            .iter()
1338            .any(|frame| !required_frames.contains(&frame.frame_index))
1339    {
1340        return invalid(
1341            "IndexShard",
1342            "FrameEntry table is not the exact set referenced by FileEntry rows",
1343        );
1344    }
1345
1346    let mut required_envelopes = BTreeSet::new();
1347    for frame in frames {
1348        let envelope = envelope_by_index
1349            .get(&frame.envelope_index)
1350            .and_then(|idx| envelopes.get(*idx))
1351            .ok_or_else(|| FormatError::InvalidMetadata {
1352                structure: "FrameEntry",
1353                reason: "referenced EnvelopeEntry is missing",
1354            })?;
1355        validate_frame_envelope_binding(frame, envelope)?;
1356        required_envelopes.insert(frame.envelope_index);
1357    }
1358    if required_envelopes.len() != envelopes.len()
1359        || envelopes
1360            .iter()
1361            .any(|entry| !required_envelopes.contains(&entry.envelope_index))
1362    {
1363        return invalid(
1364            "IndexShard",
1365            "EnvelopeEntry table is not the exact set referenced by FrameEntry rows",
1366        );
1367    }
1368    validate_frame_slices_by_envelope(frames, envelopes)?;
1369
1370    if let Some(first) = files.first() {
1371        if first.path_hash != locating_shard.first_path_hash {
1372            return invalid(
1373                "IndexShard",
1374                "first FileEntry hash does not match ShardEntry",
1375            );
1376        }
1377    }
1378    if let Some(last) = files.last() {
1379        if last.path_hash != locating_shard.last_path_hash {
1380            return invalid(
1381                "IndexShard",
1382                "last FileEntry hash does not match ShardEntry",
1383            );
1384        }
1385    }
1386
1387    Ok((paths, starts))
1388}
1389
1390fn validate_frame_table(frames: &[FrameEntry]) -> Result<(), FormatError> {
1391    for frame in frames {
1392        if frame.compressed_size == 0 || frame.decompressed_size == 0 {
1393            return invalid("FrameEntry", "frame sizes must be non-zero");
1394        }
1395        if frame.flags & !FRAME_KNOWN_FLAGS != 0 {
1396            return invalid("FrameEntry", "reserved flag bits are non-zero");
1397        }
1398    }
1399    for pair in frames.windows(2) {
1400        let previous = &pair[0];
1401        let next = &pair[1];
1402        if previous.frame_index >= next.frame_index {
1403            return invalid("IndexShard", "FrameEntry rows are not sorted and unique");
1404        }
1405        let previous_end = previous
1406            .tar_stream_offset
1407            .checked_add(previous.decompressed_size as u64)
1408            .ok_or(FormatError::MetadataArithmeticOverflow {
1409                structure: "FrameEntry",
1410            })?;
1411        if next.frame_index == previous.frame_index + 1 {
1412            if next.tar_stream_offset != previous_end {
1413                return invalid(
1414                    "FrameEntry",
1415                    "consecutive tar stream offsets are not packed",
1416                );
1417            }
1418        } else if next.tar_stream_offset <= previous_end {
1419            return invalid("FrameEntry", "non-consecutive tar stream offsets overlap");
1420        }
1421    }
1422    Ok(())
1423}
1424
1425fn validate_envelope_table(
1426    envelopes: &[EnvelopeEntry],
1427    limits: MetadataLimits,
1428) -> Result<(), FormatError> {
1429    for envelope in envelopes {
1430        if envelope.frame_count == 0 || envelope.plaintext_size == 0 {
1431            return invalid("EnvelopeEntry", "payload envelope has no frame plaintext");
1432        }
1433        validate_encrypted_extent(
1434            "EnvelopeEntry",
1435            envelope.data_block_count,
1436            envelope.encrypted_size,
1437            limits.block_size,
1438        )?;
1439        validate_fec_class_extent(
1440            "EnvelopeEntry",
1441            envelope.data_block_count,
1442            envelope.parity_block_count,
1443            limits.max_payload_data_shards,
1444            limits.max_payload_parity_shards,
1445        )?;
1446    }
1447    for pair in envelopes.windows(2) {
1448        if pair[0].envelope_index >= pair[1].envelope_index {
1449            return invalid("IndexShard", "EnvelopeEntry rows are not sorted and unique");
1450        }
1451    }
1452    Ok(())
1453}
1454
1455fn validate_file_order(
1456    files: &[FileEntry],
1457    paths: &[Vec<u8>],
1458    starts: &[u64],
1459) -> Result<(), FormatError> {
1460    for idx in 1..files.len() {
1461        let previous_key = (
1462            &files[idx - 1].path_hash,
1463            paths[idx - 1].as_slice(),
1464            starts[idx - 1],
1465        );
1466        let current_key = (&files[idx].path_hash, paths[idx].as_slice(), starts[idx]);
1467        if previous_key >= current_key {
1468            return invalid("IndexShard", "FileEntry rows are not sorted and unique");
1469        }
1470    }
1471    Ok(())
1472}
1473
1474fn validate_file_frame_range(
1475    file: &FileEntry,
1476    frames: &[FrameEntry],
1477    frame_by_index: &HashMap<u64, usize>,
1478) -> Result<(), FormatError> {
1479    let first = frame_for_file(file, frame_by_index, frames, file.first_frame_index)?;
1480    if file.offset_in_first_frame_plaintext >= first.decompressed_size {
1481        return invalid(
1482            "FileEntry",
1483            "offset in first frame is outside the first referenced frame",
1484        );
1485    }
1486
1487    let mut bytes_before_last =
1488        first.decompressed_size as u64 - file.offset_in_first_frame_plaintext as u64;
1489    if file.frame_count == 1 {
1490        if file.tar_member_group_size > bytes_before_last {
1491            return invalid(
1492                "FileEntry",
1493                "tar member group exceeds the single referenced frame",
1494            );
1495        }
1496        return Ok(());
1497    }
1498
1499    for offset in 1..(file.frame_count as u64 - 1) {
1500        let frame_index = file.first_frame_index.checked_add(offset).ok_or(
1501            FormatError::MetadataArithmeticOverflow {
1502                structure: "FileEntry",
1503            },
1504        )?;
1505        let frame = frame_for_file(file, frame_by_index, frames, frame_index)?;
1506        bytes_before_last = bytes_before_last
1507            .checked_add(frame.decompressed_size as u64)
1508            .ok_or(FormatError::MetadataArithmeticOverflow {
1509                structure: "FileEntry",
1510            })?;
1511    }
1512
1513    let last_index = file
1514        .first_frame_index
1515        .checked_add(file.frame_count as u64 - 1)
1516        .ok_or(FormatError::MetadataArithmeticOverflow {
1517            structure: "FileEntry",
1518        })?;
1519    let last = frame_for_file(file, frame_by_index, frames, last_index)?;
1520    let max_size = bytes_before_last
1521        .checked_add(last.decompressed_size as u64)
1522        .ok_or(FormatError::MetadataArithmeticOverflow {
1523            structure: "FileEntry",
1524        })?;
1525    if file.tar_member_group_size <= bytes_before_last || file.tar_member_group_size > max_size {
1526        return invalid("FileEntry", "frame range is not minimal");
1527    }
1528    Ok(())
1529}
1530
1531fn validate_frame_envelope_binding(
1532    frame: &FrameEntry,
1533    envelope: &EnvelopeEntry,
1534) -> Result<(), FormatError> {
1535    let envelope_frame_end = envelope
1536        .first_frame_index
1537        .checked_add(envelope.frame_count as u64)
1538        .ok_or(FormatError::MetadataArithmeticOverflow {
1539            structure: "EnvelopeEntry",
1540        })?;
1541    if frame.frame_index < envelope.first_frame_index || frame.frame_index >= envelope_frame_end {
1542        return invalid("FrameEntry", "frame index is outside envelope frame range");
1543    }
1544    let end = frame
1545        .offset_in_envelope
1546        .checked_add(frame.compressed_size)
1547        .ok_or(FormatError::MetadataArithmeticOverflow {
1548            structure: "FrameEntry",
1549        })?;
1550    if end > envelope.plaintext_size {
1551        return invalid("FrameEntry", "frame slice exceeds envelope plaintext");
1552    }
1553    Ok(())
1554}
1555
1556fn validate_frame_slices_by_envelope(
1557    frames: &[FrameEntry],
1558    envelopes: &[EnvelopeEntry],
1559) -> Result<(), FormatError> {
1560    for envelope in envelopes {
1561        let mut slices = frames
1562            .iter()
1563            .filter(|frame| frame.envelope_index == envelope.envelope_index)
1564            .map(|frame| {
1565                let end = frame
1566                    .offset_in_envelope
1567                    .checked_add(frame.compressed_size)
1568                    .ok_or(FormatError::MetadataArithmeticOverflow {
1569                        structure: "FrameEntry",
1570                    })?;
1571                Ok((frame.offset_in_envelope, end, frame.frame_index))
1572            })
1573            .collect::<Result<Vec<_>, FormatError>>()?;
1574        slices.sort_unstable_by_key(|slice| (slice.0, slice.2));
1575        for pair in slices.windows(2) {
1576            if pair[0].1 > pair[1].0 {
1577                return invalid("FrameEntry", "frame slices overlap inside an envelope");
1578            }
1579        }
1580
1581        let contains_complete_global_range = (0..envelope.frame_count as u64).all(|offset| {
1582            envelope
1583                .first_frame_index
1584                .checked_add(offset)
1585                .map(|index| slices.iter().any(|slice| slice.2 == index))
1586                .unwrap_or(false)
1587        });
1588        if contains_complete_global_range {
1589            let mut cursor = 0u32;
1590            for (start, end, _) in slices {
1591                if start != cursor {
1592                    return invalid("EnvelopeEntry", "complete local envelope has frame gap");
1593                }
1594                cursor = end;
1595            }
1596            if cursor != envelope.plaintext_size {
1597                return invalid(
1598                    "EnvelopeEntry",
1599                    "complete local envelope does not cover plaintext",
1600                );
1601            }
1602        }
1603    }
1604    Ok(())
1605}
1606
1607fn validate_directory_hint_entries(
1608    entries: &[DirectoryHintEntry],
1609    bytes: &[u8],
1610    header: &DirectoryHintTableHeader,
1611    locating_shard: &DirectoryHintShardEntry,
1612    index_root_shard_count: u32,
1613) -> Result<usize, FormatError> {
1614    let structure = "DirectoryHintTable";
1615    if index_root_shard_count == 0 {
1616        return invalid(structure, "directory hints require IndexRoot shard rows");
1617    }
1618    if entries.is_empty() {
1619        return invalid(structure, "located directory hint table is empty");
1620    }
1621    if entries[0].dir_hash != locating_shard.first_dir_hash {
1622        return invalid(
1623            structure,
1624            "first DirectoryHintEntry hash does not match locating row",
1625        );
1626    }
1627    if entries[entries.len() - 1].dir_hash != locating_shard.last_dir_hash {
1628        return invalid(
1629            structure,
1630            "last DirectoryHintEntry hash does not match locating row",
1631        );
1632    }
1633
1634    let mut max_shard_list_end = 0usize;
1635    for entry in entries {
1636        if entry.shard_count == 0 {
1637            return invalid("DirectoryHintEntry", "shard count is zero");
1638        }
1639        let start = entry.shard_list_start_index as usize;
1640        let end = start.checked_add(entry.shard_count as usize).ok_or(
1641            FormatError::MetadataArithmeticOverflow {
1642                structure: "DirectoryHintEntry",
1643            },
1644        )?;
1645        max_shard_list_end = max_shard_list_end.max(end);
1646    }
1647    let byte_len = checked_mul(max_shard_list_end, 4, structure)?;
1648    let shard_list_offset = to_usize(header.shard_list_offset, structure)?;
1649    let shard_list_end = checked_add(shard_list_offset, byte_len, structure)?;
1650    if shard_list_end > bytes.len() {
1651        return invalid(structure, "shard list exceeds plaintext");
1652    }
1653    Ok(max_shard_list_end)
1654}
1655
1656fn validate_directory_hint_paths_and_lists(
1657    entries: &[DirectoryHintEntry],
1658    shard_row_indexes: &[u32],
1659    string_pool: &[u8],
1660    locating_shard: &DirectoryHintShardEntry,
1661    index_root_shard_count: u32,
1662    max_path_length: u32,
1663) -> Result<Vec<Vec<u8>>, FormatError> {
1664    let mut paths = Vec::with_capacity(entries.len());
1665    let mut seen_paths = HashSet::new();
1666    for entry in entries {
1667        let path = if entry.path_length == 0 {
1668            if entry.path_offset != 0 || entry.dir_hash != hash_prefix(b"") {
1669                return invalid(
1670                    "DirectoryHintEntry",
1671                    "root directory entry is not canonical",
1672                );
1673            }
1674            &[][..]
1675        } else {
1676            let path = string_slice(
1677                string_pool,
1678                entry.path_offset,
1679                entry.path_length as u64,
1680                "DirectoryHintEntry",
1681            )?;
1682            validate_directory_path_bytes(path, max_path_length)?;
1683            path
1684        };
1685        if hash_prefix(path) != entry.dir_hash {
1686            return invalid(
1687                "DirectoryHintEntry",
1688                "dir_hash does not match string-pool path",
1689            );
1690        }
1691        if !seen_paths.insert(path.to_vec()) {
1692            return invalid("DirectoryHintEntry", "duplicate directory path");
1693        }
1694
1695        let start = entry.shard_list_start_index as usize;
1696        let end = start.checked_add(entry.shard_count as usize).ok_or(
1697            FormatError::MetadataArithmeticOverflow {
1698                structure: "DirectoryHintEntry",
1699            },
1700        )?;
1701        let rows = shard_row_indexes
1702            .get(start..end)
1703            .ok_or(FormatError::InvalidMetadata {
1704                structure: "DirectoryHintEntry",
1705                reason: "shard-row-index range is out of bounds",
1706            })?;
1707        for pair in rows.windows(2) {
1708            if pair[0] >= pair[1] {
1709                return invalid(
1710                    "DirectoryHintEntry",
1711                    "shard-row-index list is not sorted and unique",
1712                );
1713            }
1714        }
1715        if rows.iter().any(|row| *row >= index_root_shard_count) {
1716            return invalid(
1717                "DirectoryHintEntry",
1718                "shard-row-index is outside IndexRoot shard table",
1719            );
1720        }
1721        paths.push(path.to_vec());
1722    }
1723
1724    for idx in 1..entries.len() {
1725        let previous_key = (&entries[idx - 1].dir_hash, paths[idx - 1].as_slice());
1726        let current_key = (&entries[idx].dir_hash, paths[idx].as_slice());
1727        if previous_key >= current_key {
1728            return invalid(
1729                "DirectoryHintTable",
1730                "DirectoryHintEntry rows are not sorted and unique",
1731            );
1732        }
1733    }
1734    if entries[0].dir_hash != locating_shard.first_dir_hash
1735        || entries[entries.len() - 1].dir_hash != locating_shard.last_dir_hash
1736    {
1737        return invalid(
1738            "DirectoryHintTable",
1739            "entry hash bounds do not match locating shard",
1740        );
1741    }
1742
1743    Ok(paths)
1744}
1745
1746fn candidate_interval_indexes<T>(
1747    entries: &[T],
1748    target_hash: [u8; 8],
1749    scan_cap_per_direction: usize,
1750    first_hash: impl Fn(&T) -> [u8; 8],
1751    last_hash: impl Fn(&T) -> [u8; 8],
1752) -> Result<Vec<usize>, FormatError> {
1753    if entries.is_empty() {
1754        return Ok(Vec::new());
1755    }
1756    let upper = entries.partition_point(|entry| first_hash(entry) <= target_hash);
1757    if upper == 0 {
1758        return Ok(Vec::new());
1759    }
1760    let landing = upper - 1;
1761    if last_hash(&entries[landing]) < target_hash {
1762        return Ok(Vec::new());
1763    }
1764
1765    let mut start = landing;
1766    let mut left_scanned = 0usize;
1767    while start > 0
1768        && first_hash(&entries[start - 1]) <= target_hash
1769        && last_hash(&entries[start - 1]) >= target_hash
1770    {
1771        left_scanned += 1;
1772        if left_scanned > scan_cap_per_direction {
1773            return Err(FormatError::HashPrefixCollisionRunExceeded);
1774        }
1775        start -= 1;
1776    }
1777
1778    let mut end = landing + 1;
1779    let mut right_scanned = 0usize;
1780    while end < entries.len()
1781        && first_hash(&entries[end]) <= target_hash
1782        && last_hash(&entries[end]) >= target_hash
1783    {
1784        right_scanned += 1;
1785        if right_scanned > scan_cap_per_direction {
1786            return Err(FormatError::HashPrefixCollisionRunExceeded);
1787        }
1788        end += 1;
1789    }
1790
1791    Ok((start..end).collect())
1792}
1793
1794pub fn validate_file_path_bytes(path: &[u8], max_path_length: u32) -> Result<(), FormatError> {
1795    if path.is_empty() || path.len() > max_path_length as usize {
1796        return Err(FormatError::UnsafeArchivePath);
1797    }
1798    validate_relative_path(path, false)
1799}
1800
1801pub fn validate_directory_path_bytes(path: &[u8], max_path_length: u32) -> Result<(), FormatError> {
1802    if path.len() > max_path_length as usize {
1803        return Err(FormatError::UnsafeArchivePath);
1804    }
1805    validate_relative_path(path, true)
1806}
1807
1808fn validate_relative_path(path: &[u8], allow_empty_root: bool) -> Result<(), FormatError> {
1809    if path.is_empty() {
1810        return if allow_empty_root {
1811            Ok(())
1812        } else {
1813            Err(FormatError::UnsafeArchivePath)
1814        };
1815    }
1816    if path.contains(&0) || path.contains(&b'\\') || path.contains(&b':') || path[0] == b'/' {
1817        return Err(FormatError::UnsafeArchivePath);
1818    }
1819    let path_str = std::str::from_utf8(path).map_err(|_| FormatError::UnsafeArchivePath)?;
1820    if !path_str.nfc().eq(path_str.chars()) {
1821        return Err(FormatError::UnsafeArchivePath);
1822    }
1823    for component in path_str.split('/') {
1824        if component.is_empty() || component == "." || component == ".." {
1825            return Err(FormatError::UnsafeArchivePath);
1826        }
1827        if is_windows_device_component(component) {
1828            return Err(FormatError::UnsafeArchivePath);
1829        }
1830    }
1831    Ok(())
1832}
1833
1834fn is_windows_device_component(component: &str) -> bool {
1835    let stem = component
1836        .split('.')
1837        .next()
1838        .unwrap_or(component)
1839        .trim_end_matches(|ch| ch == ' ' || ch == '.');
1840    let upper = stem.to_ascii_uppercase();
1841    matches!(
1842        upper.as_str(),
1843        "CON"
1844            | "PRN"
1845            | "AUX"
1846            | "NUL"
1847            | "CLOCK$"
1848            | "COM1"
1849            | "COM2"
1850            | "COM3"
1851            | "COM4"
1852            | "COM5"
1853            | "COM6"
1854            | "COM7"
1855            | "COM8"
1856            | "COM9"
1857            | "COM\u{00b9}"
1858            | "COM\u{00b2}"
1859            | "COM\u{00b3}"
1860            | "LPT1"
1861            | "LPT2"
1862            | "LPT3"
1863            | "LPT4"
1864            | "LPT5"
1865            | "LPT6"
1866            | "LPT7"
1867            | "LPT8"
1868            | "LPT9"
1869            | "LPT\u{00b9}"
1870            | "LPT\u{00b2}"
1871            | "LPT\u{00b3}"
1872    )
1873}
1874
1875fn validate_encrypted_extent(
1876    structure: &'static str,
1877    data_block_count: u32,
1878    encrypted_size: u32,
1879    block_size: u32,
1880) -> Result<(), FormatError> {
1881    if data_block_count == 0 || encrypted_size == 0 {
1882        return invalid(structure, "encrypted object has zero data blocks or size");
1883    }
1884    let expected = (data_block_count as u64)
1885        .checked_mul(block_size as u64)
1886        .ok_or(FormatError::MetadataArithmeticOverflow { structure })?;
1887    if expected > u32::MAX as u64 || expected != encrypted_size as u64 {
1888        return invalid(
1889            structure,
1890            "encrypted_size is not data_block_count * block_size",
1891        );
1892    }
1893    Ok(())
1894}
1895
1896fn validate_fec_class_extent(
1897    structure: &'static str,
1898    data_block_count: u32,
1899    parity_block_count: u32,
1900    data_shard_max: u16,
1901    parity_shard_max: u16,
1902) -> Result<(), FormatError> {
1903    if data_block_count > data_shard_max as u32 {
1904        return invalid(structure, "data_block_count exceeds class maximum");
1905    }
1906    if parity_block_count > parity_shard_max as u32 {
1907        return invalid(structure, "parity_block_count exceeds class maximum");
1908    }
1909    let total = data_block_count as u64 + parity_block_count as u64;
1910    if total > REED_SOLOMON_GF16_MAX_TOTAL_SHARDS {
1911        return invalid(
1912            structure,
1913            "data_block_count + parity_block_count exceeds ReedSolomonGF16 limit",
1914        );
1915    }
1916    Ok(())
1917}
1918
1919fn frame_for_file<'a>(
1920    _file: &FileEntry,
1921    frame_by_index: &HashMap<u64, usize>,
1922    frames: &'a [FrameEntry],
1923    frame_index: u64,
1924) -> Result<&'a FrameEntry, FormatError> {
1925    frame_by_index
1926        .get(&frame_index)
1927        .and_then(|idx| frames.get(*idx))
1928        .ok_or(FormatError::InvalidMetadata {
1929            structure: "FileEntry",
1930            reason: "referenced FrameEntry is missing",
1931        })
1932}
1933
1934fn parse_counted_table<T>(
1935    bytes: &[u8],
1936    structure: &'static str,
1937    name: &'static str,
1938    count: u64,
1939    offset: u64,
1940    entry_len: usize,
1941    cursor: &mut usize,
1942    parse: fn(&[u8]) -> Result<T, FormatError>,
1943) -> Result<Vec<T>, FormatError> {
1944    if count == 0 {
1945        if offset != 0 {
1946            return invalid(structure, "absent counted table has non-zero offset");
1947        }
1948        return Ok(Vec::new());
1949    }
1950    expect_offset(structure, name, offset, *cursor)?;
1951    let count = to_usize(count, structure)?;
1952    let bytes_len = checked_mul(count, entry_len, structure)?;
1953    let table = slice(bytes, *cursor, bytes_len, structure)?;
1954    *cursor = checked_add(*cursor, bytes_len, structure)?;
1955    table.chunks_exact(entry_len).map(parse).collect()
1956}
1957
1958fn parse_u32_array(bytes: &[u8], structure: &'static str) -> Result<Vec<u32>, FormatError> {
1959    let mut out = Vec::with_capacity(bytes.len() / 4);
1960    for chunk in bytes.chunks_exact(4) {
1961        out.push(read_u32(chunk, 0, structure)?);
1962    }
1963    Ok(out)
1964}
1965
1966fn string_slice<'a>(
1967    string_pool: &'a [u8],
1968    offset: u64,
1969    length: u64,
1970    structure: &'static str,
1971) -> Result<&'a [u8], FormatError> {
1972    let start = to_usize(offset, structure)?;
1973    let len = to_usize(length, structure)?;
1974    slice(string_pool, start, len, structure)
1975}
1976
1977fn shard_entry_sort_key(entry: &ShardEntry) -> ([u8; 8], [u8; 8], u64) {
1978    (
1979        entry.first_path_hash,
1980        entry.last_path_hash,
1981        entry.shard_index,
1982    )
1983}
1984
1985fn directory_hint_shard_sort_key(entry: &DirectoryHintShardEntry) -> ([u8; 8], [u8; 8], u64) {
1986    (
1987        entry.first_dir_hash,
1988        entry.last_dir_hash,
1989        entry.hint_shard_index,
1990    )
1991}
1992
1993fn table_offset(len: usize, cursor: usize) -> u32 {
1994    if len == 0 {
1995        0
1996    } else {
1997        cursor as u32
1998    }
1999}
2000
2001fn expect_magic(
2002    structure: &'static str,
2003    expected: [u8; 4],
2004    actual: [u8; 4],
2005) -> Result<(), FormatError> {
2006    if actual != expected {
2007        return Err(FormatError::BadMagic { structure });
2008    }
2009    Ok(())
2010}
2011
2012fn expect_zero(structure: &'static str, bytes: &[u8]) -> Result<(), FormatError> {
2013    if bytes.iter().any(|byte| *byte != 0) {
2014        return Err(FormatError::NonZeroReserved { structure });
2015    }
2016    Ok(())
2017}
2018
2019fn expect_offset(
2020    structure: &'static str,
2021    name: &'static str,
2022    actual: u64,
2023    expected: usize,
2024) -> Result<(), FormatError> {
2025    if actual != expected as u64 {
2026        return Err(FormatError::InvalidMetadata {
2027            structure,
2028            reason: name,
2029        });
2030    }
2031    Ok(())
2032}
2033
2034fn slice<'a>(
2035    bytes: &'a [u8],
2036    offset: usize,
2037    len: usize,
2038    structure: &'static str,
2039) -> Result<&'a [u8], FormatError> {
2040    let end = checked_add(offset, len, structure)?;
2041    bytes.get(offset..end).ok_or(FormatError::InvalidMetadata {
2042        structure,
2043        reason: "range is out of bounds",
2044    })
2045}
2046
2047fn read_array<const N: usize>(
2048    bytes: &[u8],
2049    offset: usize,
2050    structure: &'static str,
2051) -> Result<[u8; N], FormatError> {
2052    let mut out = [0u8; N];
2053    out.copy_from_slice(slice(bytes, offset, N, structure)?);
2054    Ok(out)
2055}
2056
2057fn read_u32(bytes: &[u8], offset: usize, structure: &'static str) -> Result<u32, FormatError> {
2058    let raw = read_array::<4>(bytes, offset, structure)?;
2059    Ok(u32::from_le_bytes(raw))
2060}
2061
2062fn read_u64(bytes: &[u8], offset: usize, structure: &'static str) -> Result<u64, FormatError> {
2063    let raw = read_array::<8>(bytes, offset, structure)?;
2064    Ok(u64::from_le_bytes(raw))
2065}
2066
2067fn write_u32(bytes: &mut [u8], offset: usize, value: u32) {
2068    bytes[offset..offset + 4].copy_from_slice(&value.to_le_bytes());
2069}
2070
2071fn write_u64(bytes: &mut [u8], offset: usize, value: u64) {
2072    bytes[offset..offset + 8].copy_from_slice(&value.to_le_bytes());
2073}
2074
2075fn checked_add(lhs: usize, rhs: usize, structure: &'static str) -> Result<usize, FormatError> {
2076    lhs.checked_add(rhs)
2077        .ok_or(FormatError::MetadataArithmeticOverflow { structure })
2078}
2079
2080fn checked_mul(lhs: usize, rhs: usize, structure: &'static str) -> Result<usize, FormatError> {
2081    lhs.checked_mul(rhs)
2082        .ok_or(FormatError::MetadataArithmeticOverflow { structure })
2083}
2084
2085fn to_usize(value: u64, structure: &'static str) -> Result<usize, FormatError> {
2086    usize::try_from(value).map_err(|_| FormatError::MetadataArithmeticOverflow { structure })
2087}
2088
2089fn invalid<T>(structure: &'static str, reason: &'static str) -> Result<T, FormatError> {
2090    Err(FormatError::InvalidMetadata { structure, reason })
2091}
2092
2093#[cfg(test)]
2094mod tests {
2095    use super::*;
2096
2097    #[test]
2098    fn default_reader_caps_match_v36() {
2099        let limits = MetadataLimits::default();
2100        assert_eq!(limits.max_shard_count, 1_000_000);
2101        assert_eq!(limits.max_directory_hint_shards, 1_000_000);
2102        assert_eq!(limits.max_files_per_index_shard, 1_000_000);
2103        assert_eq!(limits.max_entries_per_directory_hint_shard, 1_000_000);
2104        assert_eq!(limits.max_hash_collision_shard_scan, 16);
2105    }
2106
2107    #[test]
2108    fn index_root_rejects_shard_extent_above_crypto_header_class_limits() {
2109        let path_hash = hash_prefix(b"a.txt");
2110        let root = IndexRoot {
2111            header: IndexRootHeader {
2112                file_count: 1,
2113                ..IndexRootHeader::empty()
2114            },
2115            shards: vec![ShardEntry {
2116                shard_index: 0,
2117                first_block_index: 1,
2118                data_block_count: 1,
2119                parity_block_count: 2,
2120                encrypted_size: 4096,
2121                decompressed_size: 64,
2122                file_count: 1,
2123                first_path_hash: path_hash,
2124                last_path_hash: path_hash,
2125            }],
2126            directory_hint_shards: Vec::new(),
2127        };
2128        let mut limits = MetadataLimits::default();
2129        limits.max_index_parity_shards = 1;
2130
2131        assert_eq!(
2132            IndexRoot::parse(&root.to_bytes(), false, limits).unwrap_err(),
2133            FormatError::InvalidMetadata {
2134                structure: "ShardEntry",
2135                reason: "parity_block_count exceeds class maximum",
2136            }
2137        );
2138    }
2139
2140    #[test]
2141    fn metadata_fec_extent_rejects_reed_solomon_total_overflow() {
2142        assert_eq!(
2143            validate_fec_class_extent("EnvelopeEntry", 65_535, 1, u16::MAX, u16::MAX).unwrap_err(),
2144            FormatError::InvalidMetadata {
2145                structure: "EnvelopeEntry",
2146                reason: "data_block_count + parity_block_count exceeds ReedSolomonGF16 limit",
2147            }
2148        );
2149    }
2150
2151    #[test]
2152    fn parses_valid_empty_index_root() {
2153        let root = IndexRoot {
2154            header: IndexRootHeader::empty(),
2155            shards: Vec::new(),
2156            directory_hint_shards: Vec::new(),
2157        };
2158
2159        let bytes = root.to_bytes();
2160        let parsed = IndexRoot::parse(&bytes, false, MetadataLimits::default()).unwrap();
2161
2162        assert_eq!(parsed.header.file_count, 0);
2163        assert!(parsed.shards.is_empty());
2164        assert!(parsed.directory_hint_shards.is_empty());
2165    }
2166
2167    #[test]
2168    fn index_root_rejects_nonzero_offsets_for_absent_counted_tables() {
2169        let mut root = IndexRoot {
2170            header: IndexRootHeader::empty(),
2171            shards: Vec::new(),
2172            directory_hint_shards: Vec::new(),
2173        };
2174
2175        let mut bytes = root.to_bytes();
2176        write_u64(&mut bytes, 88, INDEX_ROOT_LEN as u64);
2177        assert_eq!(
2178            IndexRoot::parse(&bytes, false, MetadataLimits::default()).unwrap_err(),
2179            FormatError::InvalidMetadata {
2180                structure: "IndexRoot",
2181                reason: "absent shard table has non-zero offset",
2182            }
2183        );
2184
2185        root.header.file_count = 1;
2186        root.shards.push(ShardEntry {
2187            shard_index: 0,
2188            first_block_index: 1,
2189            data_block_count: 1,
2190            parity_block_count: 0,
2191            encrypted_size: 4096,
2192            decompressed_size: 128,
2193            file_count: 1,
2194            first_path_hash: hash_prefix(b"a.txt"),
2195            last_path_hash: hash_prefix(b"a.txt"),
2196        });
2197        let mut bytes = root.to_bytes();
2198        write_u64(&mut bytes, 96, (INDEX_ROOT_LEN + SHARD_ENTRY_LEN) as u64);
2199        assert_eq!(
2200            IndexRoot::parse(&bytes, false, MetadataLimits::default()).unwrap_err(),
2201            FormatError::InvalidMetadata {
2202                structure: "IndexRoot",
2203                reason: "absent directory hint shard table has non-zero offset",
2204            }
2205        );
2206    }
2207
2208    #[test]
2209    fn index_root_rejects_has_dictionary_with_zero_dictionary_fields() {
2210        let root = IndexRoot {
2211            header: IndexRootHeader::empty(),
2212            shards: Vec::new(),
2213            directory_hint_shards: Vec::new(),
2214        };
2215
2216        assert_eq!(
2217            IndexRoot::parse(&root.to_bytes(), true, MetadataLimits::default()).unwrap_err(),
2218            FormatError::InvalidMetadata {
2219                structure: "IndexRoot",
2220                reason: "dictionary data block count is zero while has_dictionary is true",
2221            }
2222        );
2223    }
2224
2225    #[test]
2226    fn index_root_rejects_empty_archive_with_dictionary_extent() {
2227        let root = IndexRoot {
2228            header: IndexRootHeader {
2229                dictionary_first_block: 1,
2230                dictionary_data_block_count: 1,
2231                dictionary_encrypted_size: 4096,
2232                dictionary_decompressed_size: 16,
2233                ..IndexRootHeader::empty()
2234            },
2235            shards: Vec::new(),
2236            directory_hint_shards: Vec::new(),
2237        };
2238
2239        assert_eq!(
2240            IndexRoot::parse(&root.to_bytes(), true, MetadataLimits::default()).unwrap_err(),
2241            FormatError::InvalidMetadata {
2242                structure: "IndexRoot",
2243                reason: "empty archive cannot use dictionary",
2244            }
2245        );
2246    }
2247
2248    #[test]
2249    fn encrypted_object_extents_reject_zero_data_or_size_for_all_metadata_rows() {
2250        assert_eq!(
2251            validate_encrypted_extent("ManifestFooter.IndexRoot", 0, 4096, 4096).unwrap_err(),
2252            FormatError::InvalidMetadata {
2253                structure: "ManifestFooter.IndexRoot",
2254                reason: "encrypted object has zero data blocks or size",
2255            }
2256        );
2257        assert_eq!(
2258            validate_encrypted_extent("EnvelopeEntry", 1, 0, 4096).unwrap_err(),
2259            FormatError::InvalidMetadata {
2260                structure: "EnvelopeEntry",
2261                reason: "encrypted object has zero data blocks or size",
2262            }
2263        );
2264
2265        let path_hash = hash_prefix(b"a.txt");
2266        let mut root = IndexRoot {
2267            header: IndexRootHeader {
2268                file_count: 1,
2269                ..IndexRootHeader::empty()
2270            },
2271            shards: vec![ShardEntry {
2272                shard_index: 0,
2273                first_block_index: 1,
2274                data_block_count: 0,
2275                parity_block_count: 0,
2276                encrypted_size: 4096,
2277                decompressed_size: 128,
2278                file_count: 1,
2279                first_path_hash: path_hash,
2280                last_path_hash: path_hash,
2281            }],
2282            directory_hint_shards: Vec::new(),
2283        };
2284        assert_eq!(
2285            IndexRoot::parse(&root.to_bytes(), false, MetadataLimits::default()).unwrap_err(),
2286            FormatError::InvalidMetadata {
2287                structure: "ShardEntry",
2288                reason: "encrypted object has zero data blocks or size",
2289            }
2290        );
2291
2292        root.shards[0].data_block_count = 1;
2293        root.shards[0].encrypted_size = 4096;
2294        root.directory_hint_shards.push(DirectoryHintShardEntry {
2295            hint_shard_index: 0,
2296            first_dir_hash: hash_prefix(b""),
2297            last_dir_hash: hash_prefix(b""),
2298            first_block_index: 2,
2299            data_block_count: 1,
2300            parity_block_count: 0,
2301            encrypted_size: 0,
2302            decompressed_size: 72,
2303            entry_count: 1,
2304        });
2305        assert_eq!(
2306            IndexRoot::parse(&root.to_bytes(), false, MetadataLimits::default()).unwrap_err(),
2307            FormatError::InvalidMetadata {
2308                structure: "DirectoryHintShardEntry",
2309                reason: "encrypted object has zero data blocks or size",
2310            }
2311        );
2312
2313        let mut dict_root = IndexRoot {
2314            header: IndexRootHeader {
2315                file_count: 1,
2316                dictionary_first_block: 10,
2317                dictionary_data_block_count: 0,
2318                dictionary_parity_block_count: 0,
2319                dictionary_encrypted_size: 4096,
2320                dictionary_decompressed_size: 32,
2321                ..IndexRootHeader::empty()
2322            },
2323            shards: vec![ShardEntry {
2324                shard_index: 0,
2325                first_block_index: 1,
2326                data_block_count: 1,
2327                parity_block_count: 0,
2328                encrypted_size: 4096,
2329                decompressed_size: 128,
2330                file_count: 1,
2331                first_path_hash: path_hash,
2332                last_path_hash: path_hash,
2333            }],
2334            directory_hint_shards: Vec::new(),
2335        };
2336        assert_eq!(
2337            IndexRoot::parse(&dict_root.to_bytes(), true, MetadataLimits::default()).unwrap_err(),
2338            FormatError::InvalidMetadata {
2339                structure: "IndexRoot",
2340                reason: "dictionary data block count is zero while has_dictionary is true",
2341            }
2342        );
2343        dict_root.header.dictionary_data_block_count = 1;
2344        dict_root.header.dictionary_encrypted_size = 0;
2345        assert_eq!(
2346            IndexRoot::parse(&dict_root.to_bytes(), true, MetadataLimits::default()).unwrap_err(),
2347            FormatError::InvalidMetadata {
2348                structure: "IndexRoot",
2349                reason: "required dictionary field is zero",
2350            }
2351        );
2352    }
2353
2354    #[test]
2355    fn index_root_rejects_dictionary_fields_when_crypto_header_has_no_dictionary() {
2356        let mut root = IndexRoot {
2357            header: IndexRootHeader::empty(),
2358            shards: Vec::new(),
2359            directory_hint_shards: Vec::new(),
2360        };
2361        root.header.dictionary_first_block = 1;
2362        root.header.dictionary_data_block_count = 1;
2363        root.header.dictionary_encrypted_size = 4096;
2364        root.header.dictionary_decompressed_size = 16;
2365
2366        assert_eq!(
2367            IndexRoot::parse(&root.to_bytes(), false, MetadataLimits::default()).unwrap_err(),
2368            FormatError::InvalidMetadata {
2369                structure: "IndexRoot",
2370                reason: "dictionary fields are non-zero while has_dictionary is false",
2371            }
2372        );
2373    }
2374
2375    #[test]
2376    fn rejects_directory_hint_rows_sorted_by_old_v36_key_only() {
2377        let h = [0x10; 8];
2378        let z = [0x20; 8];
2379        let root = IndexRoot {
2380            header: IndexRootHeader {
2381                file_count: 1,
2382                ..IndexRootHeader::empty()
2383            },
2384            shards: vec![ShardEntry {
2385                shard_index: 0,
2386                first_block_index: 0,
2387                data_block_count: 1,
2388                parity_block_count: 1,
2389                encrypted_size: 4096,
2390                decompressed_size: 64,
2391                file_count: 1,
2392                first_path_hash: h,
2393                last_path_hash: z,
2394            }],
2395            directory_hint_shards: vec![
2396                DirectoryHintShardEntry {
2397                    hint_shard_index: 0,
2398                    first_dir_hash: h,
2399                    last_dir_hash: z,
2400                    first_block_index: 10,
2401                    data_block_count: 1,
2402                    parity_block_count: 1,
2403                    encrypted_size: 4096,
2404                    decompressed_size: 72,
2405                    entry_count: 1,
2406                },
2407                DirectoryHintShardEntry {
2408                    hint_shard_index: 1,
2409                    first_dir_hash: h,
2410                    last_dir_hash: h,
2411                    first_block_index: 12,
2412                    data_block_count: 1,
2413                    parity_block_count: 1,
2414                    encrypted_size: 4096,
2415                    decompressed_size: 72,
2416                    entry_count: 1,
2417                },
2418            ],
2419        };
2420
2421        assert_eq!(
2422            IndexRoot::parse(&root.to_bytes(), false, MetadataLimits::default()).unwrap_err(),
2423            FormatError::InvalidMetadata {
2424                structure: "IndexRoot",
2425                reason: "DirectoryHintShardEntry rows are not sorted"
2426            }
2427        );
2428    }
2429
2430    #[test]
2431    fn directory_hint_shard_count_cap_is_independent_from_index_shard_cap() {
2432        let path_hash = hash_prefix(b"a.txt");
2433        let dir_hash = hash_prefix(b"");
2434        let root = IndexRoot {
2435            header: IndexRootHeader {
2436                file_count: 1,
2437                ..IndexRootHeader::empty()
2438            },
2439            shards: vec![ShardEntry {
2440                shard_index: 0,
2441                first_block_index: 1,
2442                data_block_count: 1,
2443                parity_block_count: 0,
2444                encrypted_size: 4096,
2445                decompressed_size: 128,
2446                file_count: 1,
2447                first_path_hash: path_hash,
2448                last_path_hash: path_hash,
2449            }],
2450            directory_hint_shards: vec![DirectoryHintShardEntry {
2451                hint_shard_index: 0,
2452                first_dir_hash: dir_hash,
2453                last_dir_hash: dir_hash,
2454                first_block_index: 2,
2455                data_block_count: 1,
2456                parity_block_count: 0,
2457                encrypted_size: 4096,
2458                decompressed_size: 72,
2459                entry_count: 1,
2460            }],
2461        };
2462        let mut limits = MetadataLimits::default();
2463        limits.max_shard_count = 1;
2464        limits.max_directory_hint_shards = 1;
2465        IndexRoot::parse(&root.to_bytes(), false, limits).unwrap();
2466
2467        limits.max_directory_hint_shards = 0;
2468        assert_eq!(
2469            IndexRoot::parse(&root.to_bytes(), false, limits).unwrap_err(),
2470            FormatError::InvalidMetadata {
2471                structure: "IndexRoot",
2472                reason: "directory hint shard count exceeds resource cap",
2473            }
2474        );
2475    }
2476
2477    #[test]
2478    fn directory_hint_paths_obey_configured_max_path_length() {
2479        let path = b"toolong".to_vec();
2480        let table = DirectoryHintTable {
2481            header: DirectoryHintTableHeader {
2482                version: 1,
2483                hint_shard_index: 0,
2484                entry_count: 0,
2485                entry_table_offset: 0,
2486                shard_list_offset: 0,
2487                string_pool_offset: 0,
2488                string_pool_size: 0,
2489            },
2490            entries: vec![DirectoryHintEntry {
2491                dir_hash: hash_prefix(&path),
2492                path_offset: 0,
2493                path_length: path.len() as u32,
2494                shard_list_start_index: 0,
2495                shard_count: 1,
2496            }],
2497            shard_row_indexes: vec![0],
2498            string_pool: path.clone(),
2499            entry_paths: vec![path.clone()],
2500        };
2501        let bytes = table.to_bytes();
2502        let locating = DirectoryHintShardEntry {
2503            hint_shard_index: 0,
2504            first_dir_hash: hash_prefix(&path),
2505            last_dir_hash: hash_prefix(&path),
2506            first_block_index: 0,
2507            data_block_count: 1,
2508            parity_block_count: 0,
2509            encrypted_size: 4096,
2510            decompressed_size: bytes.len() as u32,
2511            entry_count: 1,
2512        };
2513        let mut limits = MetadataLimits::default();
2514        limits.max_path_length = 3;
2515
2516        assert_eq!(
2517            DirectoryHintTable::parse(&bytes, &locating, 1, limits).unwrap_err(),
2518            FormatError::UnsafeArchivePath
2519        );
2520    }
2521
2522    #[test]
2523    fn directory_hint_table_rejects_wrong_hint_shard_identity() {
2524        let path = b"dir".to_vec();
2525        let table = DirectoryHintTable {
2526            header: DirectoryHintTableHeader {
2527                version: 1,
2528                hint_shard_index: 5,
2529                entry_count: 0,
2530                entry_table_offset: 0,
2531                shard_list_offset: 0,
2532                string_pool_offset: 0,
2533                string_pool_size: 0,
2534            },
2535            entries: vec![DirectoryHintEntry {
2536                dir_hash: hash_prefix(&path),
2537                path_offset: 0,
2538                path_length: path.len() as u32,
2539                shard_list_start_index: 0,
2540                shard_count: 1,
2541            }],
2542            shard_row_indexes: vec![0],
2543            string_pool: path.clone(),
2544            entry_paths: vec![path.clone()],
2545        };
2546        let bytes = table.to_bytes();
2547        let locating = DirectoryHintShardEntry {
2548            hint_shard_index: 6,
2549            first_dir_hash: hash_prefix(&path),
2550            last_dir_hash: hash_prefix(&path),
2551            first_block_index: 0,
2552            data_block_count: 1,
2553            parity_block_count: 0,
2554            encrypted_size: 4096,
2555            decompressed_size: bytes.len() as u32,
2556            entry_count: 1,
2557        };
2558
2559        assert_eq!(
2560            DirectoryHintTable::parse(&bytes, &locating, 1, MetadataLimits::default()).unwrap_err(),
2561            FormatError::InvalidMetadata {
2562                structure: "DirectoryHintTable",
2563                reason: "hint shard index does not match locating DirectoryHintShardEntry",
2564            }
2565        );
2566    }
2567
2568    #[test]
2569    fn directory_hint_table_rejects_empty_shard_lists() {
2570        let path = b"dir".to_vec();
2571        let table = DirectoryHintTable {
2572            header: DirectoryHintTableHeader {
2573                version: 1,
2574                hint_shard_index: 0,
2575                entry_count: 0,
2576                entry_table_offset: 0,
2577                shard_list_offset: 0,
2578                string_pool_offset: 0,
2579                string_pool_size: 0,
2580            },
2581            entries: vec![DirectoryHintEntry {
2582                dir_hash: hash_prefix(&path),
2583                path_offset: 0,
2584                path_length: path.len() as u32,
2585                shard_list_start_index: 0,
2586                shard_count: 0,
2587            }],
2588            shard_row_indexes: Vec::new(),
2589            string_pool: path.clone(),
2590            entry_paths: vec![path.clone()],
2591        };
2592        let bytes = table.to_bytes();
2593        let locating = DirectoryHintShardEntry {
2594            hint_shard_index: 0,
2595            first_dir_hash: hash_prefix(&path),
2596            last_dir_hash: hash_prefix(&path),
2597            first_block_index: 0,
2598            data_block_count: 1,
2599            parity_block_count: 0,
2600            encrypted_size: 4096,
2601            decompressed_size: bytes.len() as u32,
2602            entry_count: 1,
2603        };
2604
2605        assert_eq!(
2606            DirectoryHintTable::parse(&bytes, &locating, 1, MetadataLimits::default()).unwrap_err(),
2607            FormatError::InvalidMetadata {
2608                structure: "DirectoryHintEntry",
2609                reason: "shard count is zero",
2610            }
2611        );
2612    }
2613
2614    #[test]
2615    fn index_shard_rejects_unsupported_version_and_zero_count_pointer_offsets() {
2616        let path = b"file.txt";
2617        let path_hash = hash_prefix(path);
2618        let file = FileEntry {
2619            path_hash,
2620            path_offset: 0,
2621            path_length: path.len() as u32,
2622            first_frame_index: 0,
2623            frame_count: 1,
2624            offset_in_first_frame_plaintext: 0,
2625            tar_member_group_size: 512,
2626            file_data_size: 0,
2627            flags: 0,
2628        };
2629        let frame = FrameEntry {
2630            frame_index: 0,
2631            envelope_index: 0,
2632            offset_in_envelope: 0,
2633            compressed_size: 128,
2634            decompressed_size: 512,
2635            flags: 0,
2636            tar_stream_offset: 0,
2637        };
2638        let envelope = EnvelopeEntry {
2639            envelope_index: 0,
2640            first_block_index: 0,
2641            data_block_count: 1,
2642            parity_block_count: 0,
2643            encrypted_size: 4096,
2644            plaintext_size: 128,
2645            first_frame_index: 0,
2646            frame_count: 1,
2647        };
2648        let shard = IndexShard {
2649            header: IndexShardHeader {
2650                version: 1,
2651                shard_index: 7,
2652                file_count: 0,
2653                frame_count: 0,
2654                envelope_count: 0,
2655                file_table_offset: 0,
2656                frame_table_offset: 0,
2657                envelope_table_offset: 0,
2658                string_pool_offset: 0,
2659                string_pool_size: 0,
2660            },
2661            files: vec![file],
2662            frames: vec![frame],
2663            envelopes: vec![envelope],
2664            string_pool: path.to_vec(),
2665            file_paths: Vec::new(),
2666            file_tar_member_group_starts: Vec::new(),
2667        };
2668        let locating = ShardEntry {
2669            shard_index: 7,
2670            first_block_index: 10,
2671            data_block_count: 1,
2672            parity_block_count: 0,
2673            encrypted_size: 4096,
2674            decompressed_size: shard.to_bytes().len() as u32,
2675            file_count: 1,
2676            first_path_hash: path_hash,
2677            last_path_hash: path_hash,
2678        };
2679
2680        let mut unsupported_version = shard.to_bytes();
2681        write_u32(&mut unsupported_version, 4, 2);
2682        assert_eq!(
2683            IndexShard::parse(&unsupported_version, &locating, MetadataLimits::default())
2684                .unwrap_err(),
2685            FormatError::InvalidMetadata {
2686                structure: "IndexShard",
2687                reason: "unsupported version",
2688            }
2689        );
2690
2691        let mut nonzero_zero_frame_table = shard.to_bytes();
2692        write_u32(&mut nonzero_zero_frame_table, 20, 0);
2693        write_u32(
2694            &mut nonzero_zero_frame_table,
2695            32,
2696            INDEX_SHARD_HEADER_LEN as u32,
2697        );
2698        assert_eq!(
2699            IndexShard::parse(
2700                &nonzero_zero_frame_table,
2701                &locating,
2702                MetadataLimits::default()
2703            )
2704            .unwrap_err(),
2705            FormatError::InvalidMetadata {
2706                structure: "IndexShard",
2707                reason: "absent counted table has non-zero offset",
2708            }
2709        );
2710
2711        let mut nonzero_zero_envelope_table = shard.to_bytes();
2712        write_u32(&mut nonzero_zero_envelope_table, 24, 0);
2713        write_u32(
2714            &mut nonzero_zero_envelope_table,
2715            36,
2716            (INDEX_SHARD_HEADER_LEN + FILE_ENTRY_LEN + FRAME_ENTRY_LEN) as u32,
2717        );
2718        assert_eq!(
2719            IndexShard::parse(
2720                &nonzero_zero_envelope_table,
2721                &locating,
2722                MetadataLimits::default()
2723            )
2724            .unwrap_err(),
2725            FormatError::InvalidMetadata {
2726                structure: "IndexShard",
2727                reason: "absent counted table has non-zero offset",
2728            }
2729        );
2730    }
2731
2732    #[test]
2733    fn directory_hint_table_rejects_zero_count_nonzero_offsets() {
2734        let path = b"dir".to_vec();
2735        let table = DirectoryHintTable {
2736            header: DirectoryHintTableHeader {
2737                version: 1,
2738                hint_shard_index: 5,
2739                entry_count: 0,
2740                entry_table_offset: 0,
2741                shard_list_offset: 0,
2742                string_pool_offset: 0,
2743                string_pool_size: 0,
2744            },
2745            entries: vec![DirectoryHintEntry {
2746                dir_hash: hash_prefix(&path),
2747                path_offset: 0,
2748                path_length: path.len() as u32,
2749                shard_list_start_index: 0,
2750                shard_count: 1,
2751            }],
2752            shard_row_indexes: vec![0],
2753            string_pool: path.clone(),
2754            entry_paths: vec![path.clone()],
2755        };
2756        let locating = DirectoryHintShardEntry {
2757            hint_shard_index: 5,
2758            first_dir_hash: hash_prefix(&path),
2759            last_dir_hash: hash_prefix(&path),
2760            first_block_index: 0,
2761            data_block_count: 1,
2762            parity_block_count: 0,
2763            encrypted_size: 4096,
2764            decompressed_size: table.to_bytes().len() as u32,
2765            entry_count: 1,
2766        };
2767        let mut bytes = table.to_bytes();
2768        let bytes_len = bytes.len() as u64;
2769        write_u64(&mut bytes, 48, 0);
2770        write_u64(&mut bytes, 40, bytes_len);
2771
2772        assert_eq!(
2773            DirectoryHintTable::parse(&bytes, &locating, 1, MetadataLimits::default()).unwrap_err(),
2774            FormatError::InvalidMetadata {
2775                structure: "DirectoryHintTable",
2776                reason: "absent string pool has non-zero offset",
2777            }
2778        );
2779    }
2780
2781    #[test]
2782    fn index_shard_rejects_non_exact_local_frame_and_envelope_tables() {
2783        let path = b"exact-local.txt";
2784        let path_hash = hash_prefix(path);
2785        let file = FileEntry {
2786            path_hash,
2787            path_offset: 0,
2788            path_length: path.len() as u32,
2789            first_frame_index: 0,
2790            frame_count: 1,
2791            offset_in_first_frame_plaintext: 0,
2792            tar_member_group_size: 512,
2793            file_data_size: 0,
2794            flags: 0,
2795        };
2796        let frame = FrameEntry {
2797            frame_index: 0,
2798            envelope_index: 0,
2799            offset_in_envelope: 0,
2800            compressed_size: 128,
2801            decompressed_size: 512,
2802            flags: 0,
2803            tar_stream_offset: 0,
2804        };
2805        let envelope = EnvelopeEntry {
2806            envelope_index: 0,
2807            first_block_index: 10,
2808            data_block_count: 1,
2809            parity_block_count: 0,
2810            encrypted_size: 4096,
2811            plaintext_size: 128,
2812            first_frame_index: 0,
2813            frame_count: 1,
2814        };
2815        let shard = IndexShard {
2816            header: IndexShardHeader {
2817                version: 1,
2818                shard_index: 3,
2819                file_count: 0,
2820                frame_count: 0,
2821                envelope_count: 0,
2822                file_table_offset: 0,
2823                frame_table_offset: 0,
2824                envelope_table_offset: 0,
2825                string_pool_offset: 0,
2826                string_pool_size: 0,
2827            },
2828            files: vec![file.clone()],
2829            frames: vec![frame.clone()],
2830            envelopes: vec![envelope.clone()],
2831            string_pool: path.to_vec(),
2832            file_paths: Vec::new(),
2833            file_tar_member_group_starts: Vec::new(),
2834        };
2835        let locating = ShardEntry {
2836            shard_index: 3,
2837            first_block_index: 20,
2838            data_block_count: 1,
2839            parity_block_count: 0,
2840            encrypted_size: 4096,
2841            decompressed_size: shard.to_bytes().len() as u32,
2842            file_count: 1,
2843            first_path_hash: path_hash,
2844            last_path_hash: path_hash,
2845        };
2846        IndexShard::parse(&shard.to_bytes(), &locating, MetadataLimits::default()).unwrap();
2847
2848        let parse_with = |frames: Vec<FrameEntry>, envelopes: Vec<EnvelopeEntry>| {
2849            let mut mutated = shard.clone();
2850            mutated.frames = frames;
2851            mutated.envelopes = envelopes;
2852            let bytes = mutated.to_bytes();
2853            let locating = ShardEntry {
2854                decompressed_size: bytes.len() as u32,
2855                ..locating.clone()
2856            };
2857            IndexShard::parse(&bytes, &locating, MetadataLimits::default()).unwrap_err()
2858        };
2859
2860        let mut missing_frame = frame.clone();
2861        missing_frame.frame_index = 1;
2862        assert_eq!(
2863            parse_with(vec![missing_frame], vec![envelope.clone()]),
2864            FormatError::InvalidMetadata {
2865                structure: "FileEntry",
2866                reason: "referenced FrameEntry is missing",
2867            }
2868        );
2869
2870        let mut unreferenced_frame = frame.clone();
2871        unreferenced_frame.frame_index = 9;
2872        unreferenced_frame.tar_stream_offset = 1024;
2873        assert_eq!(
2874            parse_with(
2875                vec![frame.clone(), unreferenced_frame],
2876                vec![envelope.clone()]
2877            ),
2878            FormatError::InvalidMetadata {
2879                structure: "IndexShard",
2880                reason: "FrameEntry table is not the exact set referenced by FileEntry rows",
2881            }
2882        );
2883
2884        assert_eq!(
2885            parse_with(vec![frame.clone(), frame.clone()], vec![envelope.clone()]),
2886            FormatError::InvalidMetadata {
2887                structure: "IndexShard",
2888                reason: "FrameEntry rows are not sorted and unique",
2889            }
2890        );
2891
2892        let mut missing_envelope = envelope.clone();
2893        missing_envelope.envelope_index = 1;
2894        assert_eq!(
2895            parse_with(vec![frame.clone()], vec![missing_envelope]),
2896            FormatError::InvalidMetadata {
2897                structure: "FrameEntry",
2898                reason: "referenced EnvelopeEntry is missing",
2899            }
2900        );
2901
2902        let mut unreferenced_envelope = envelope.clone();
2903        unreferenced_envelope.envelope_index = 9;
2904        unreferenced_envelope.first_block_index = 11;
2905        unreferenced_envelope.first_frame_index = 9;
2906        assert_eq!(
2907            parse_with(
2908                vec![frame.clone()],
2909                vec![envelope.clone(), unreferenced_envelope]
2910            ),
2911            FormatError::InvalidMetadata {
2912                structure: "IndexShard",
2913                reason: "EnvelopeEntry table is not the exact set referenced by FrameEntry rows",
2914            }
2915        );
2916
2917        assert_eq!(
2918            parse_with(vec![frame], vec![envelope.clone(), envelope]),
2919            FormatError::InvalidMetadata {
2920                structure: "IndexShard",
2921                reason: "EnvelopeEntry rows are not sorted and unique",
2922            }
2923        );
2924    }
2925
2926    #[test]
2927    fn metadata_parsers_reject_malformed_buffer_corpus() {
2928        let limits = MetadataLimits::default();
2929        let path = b"file.txt";
2930        let path_hash = hash_prefix(path);
2931        let shard_entry = ShardEntry {
2932            shard_index: 0,
2933            first_block_index: 1,
2934            data_block_count: 1,
2935            parity_block_count: 0,
2936            encrypted_size: 4096,
2937            decompressed_size: 0,
2938            file_count: 1,
2939            first_path_hash: path_hash,
2940            last_path_hash: path_hash,
2941        };
2942
2943        let root = IndexRoot {
2944            header: IndexRootHeader {
2945                file_count: 1,
2946                frame_count: 1,
2947                envelope_count: 1,
2948                payload_block_count: 1,
2949                tar_total_size: 512,
2950                ..IndexRootHeader::empty()
2951            },
2952            shards: vec![ShardEntry {
2953                decompressed_size: 256,
2954                ..shard_entry.clone()
2955            }],
2956            directory_hint_shards: Vec::new(),
2957        };
2958        let root_bytes = root.to_bytes();
2959        IndexRoot::parse(&root_bytes, false, limits).unwrap();
2960
2961        assert_eq!(
2962            IndexRoot::parse(&root_bytes[..INDEX_ROOT_LEN - 1], false, limits).unwrap_err(),
2963            FormatError::InvalidMetadata {
2964                structure: "IndexRoot",
2965                reason: "plaintext is shorter than fixed header",
2966            }
2967        );
2968        let mut bad_root = root_bytes.clone();
2969        bad_root[0] ^= 1;
2970        assert_eq!(
2971            IndexRoot::parse(&bad_root, false, limits).unwrap_err(),
2972            FormatError::BadMagic {
2973                structure: "IndexRoot"
2974            }
2975        );
2976        let mut bad_root = root_bytes.clone();
2977        write_u32(&mut bad_root, 4, 2);
2978        assert_eq!(
2979            IndexRoot::parse(&bad_root, false, limits).unwrap_err(),
2980            FormatError::InvalidMetadata {
2981                structure: "IndexRoot",
2982                reason: "unsupported version",
2983            }
2984        );
2985        let mut bad_root = root_bytes.clone();
2986        bad_root[128] = 1;
2987        assert_eq!(
2988            IndexRoot::parse(&bad_root, false, limits).unwrap_err(),
2989            FormatError::NonZeroReserved {
2990                structure: "IndexRoot"
2991            }
2992        );
2993        let mut bad_root = root_bytes.clone();
2994        write_u64(&mut bad_root, 88, (INDEX_ROOT_LEN + 1) as u64);
2995        assert_eq!(
2996            IndexRoot::parse(&bad_root, false, limits).unwrap_err(),
2997            FormatError::InvalidMetadata {
2998                structure: "IndexRoot",
2999                reason: "shard table",
3000            }
3001        );
3002        assert_eq!(
3003            IndexRoot::parse(&root_bytes[..root_bytes.len() - 1], false, limits).unwrap_err(),
3004            FormatError::InvalidMetadata {
3005                structure: "IndexRoot",
3006                reason: "range is out of bounds",
3007            }
3008        );
3009        let mut bad_root = root_bytes.clone();
3010        bad_root.push(0);
3011        assert_eq!(
3012            IndexRoot::parse(&bad_root, false, limits).unwrap_err(),
3013            FormatError::InvalidMetadata {
3014                structure: "IndexRoot",
3015                reason: "plaintext length does not match canonical cursor",
3016            }
3017        );
3018
3019        let file = FileEntry {
3020            path_hash,
3021            path_offset: 0,
3022            path_length: path.len() as u32,
3023            first_frame_index: 0,
3024            frame_count: 1,
3025            offset_in_first_frame_plaintext: 0,
3026            tar_member_group_size: 512,
3027            file_data_size: 0,
3028            flags: 0,
3029        };
3030        let frame = FrameEntry {
3031            frame_index: 0,
3032            envelope_index: 0,
3033            offset_in_envelope: 0,
3034            compressed_size: 128,
3035            decompressed_size: 512,
3036            flags: 0x0000_0003,
3037            tar_stream_offset: 0,
3038        };
3039        let envelope = EnvelopeEntry {
3040            envelope_index: 0,
3041            first_block_index: 1,
3042            data_block_count: 1,
3043            parity_block_count: 0,
3044            encrypted_size: 4096,
3045            plaintext_size: 128,
3046            first_frame_index: 0,
3047            frame_count: 1,
3048        };
3049        let shard = IndexShard {
3050            header: IndexShardHeader {
3051                version: 1,
3052                shard_index: 0,
3053                file_count: 0,
3054                frame_count: 0,
3055                envelope_count: 0,
3056                file_table_offset: 0,
3057                frame_table_offset: 0,
3058                envelope_table_offset: 0,
3059                string_pool_offset: 0,
3060                string_pool_size: 0,
3061            },
3062            files: vec![file],
3063            frames: vec![frame],
3064            envelopes: vec![envelope],
3065            string_pool: path.to_vec(),
3066            file_paths: Vec::new(),
3067            file_tar_member_group_starts: Vec::new(),
3068        };
3069        let shard_bytes = shard.to_bytes();
3070        let locating = ShardEntry {
3071            decompressed_size: shard_bytes.len() as u32,
3072            ..shard_entry
3073        };
3074        IndexShard::parse(&shard_bytes, &locating, limits).unwrap();
3075
3076        assert_eq!(
3077            IndexShard::parse(
3078                &shard_bytes[..INDEX_SHARD_HEADER_LEN - 1],
3079                &locating,
3080                limits
3081            )
3082            .unwrap_err(),
3083            FormatError::InvalidMetadata {
3084                structure: "IndexShard",
3085                reason: "plaintext is shorter than fixed header",
3086            }
3087        );
3088        let mut bad_shard = shard_bytes.clone();
3089        bad_shard[0] ^= 1;
3090        assert_eq!(
3091            IndexShard::parse(&bad_shard, &locating, limits).unwrap_err(),
3092            FormatError::BadMagic {
3093                structure: "IndexShard"
3094            }
3095        );
3096        let mut bad_shard = shard_bytes.clone();
3097        bad_shard[48] = 1;
3098        assert_eq!(
3099            IndexShard::parse(&bad_shard, &locating, limits).unwrap_err(),
3100            FormatError::NonZeroReserved {
3101                structure: "IndexShard"
3102            }
3103        );
3104        let mut bad_shard = shard_bytes.clone();
3105        write_u32(&mut bad_shard, 28, INDEX_SHARD_HEADER_LEN as u32 + 1);
3106        assert_eq!(
3107            IndexShard::parse(&bad_shard, &locating, limits).unwrap_err(),
3108            FormatError::InvalidMetadata {
3109                structure: "IndexShard",
3110                reason: "file table",
3111            }
3112        );
3113        assert_eq!(
3114            IndexShard::parse(&shard_bytes[..shard_bytes.len() - 1], &locating, limits)
3115                .unwrap_err(),
3116            FormatError::InvalidMetadata {
3117                structure: "IndexShard",
3118                reason: "range is out of bounds",
3119            }
3120        );
3121        let mut bad_shard = shard_bytes.clone();
3122        bad_shard.push(0);
3123        assert_eq!(
3124            IndexShard::parse(&bad_shard, &locating, limits).unwrap_err(),
3125            FormatError::InvalidMetadata {
3126                structure: "IndexShard",
3127                reason: "plaintext length does not match canonical cursor",
3128            }
3129        );
3130
3131        let dir_path = b"dir".to_vec();
3132        let dir_hash = hash_prefix(&dir_path);
3133        let table = DirectoryHintTable {
3134            header: DirectoryHintTableHeader {
3135                version: 1,
3136                hint_shard_index: 0,
3137                entry_count: 0,
3138                entry_table_offset: 0,
3139                shard_list_offset: 0,
3140                string_pool_offset: 0,
3141                string_pool_size: 0,
3142            },
3143            entries: vec![DirectoryHintEntry {
3144                dir_hash,
3145                path_offset: 0,
3146                path_length: dir_path.len() as u32,
3147                shard_list_start_index: 0,
3148                shard_count: 1,
3149            }],
3150            shard_row_indexes: vec![0],
3151            string_pool: dir_path.clone(),
3152            entry_paths: Vec::new(),
3153        };
3154        let table_bytes = table.to_bytes();
3155        let locating_hint = DirectoryHintShardEntry {
3156            hint_shard_index: 0,
3157            first_dir_hash: dir_hash,
3158            last_dir_hash: dir_hash,
3159            first_block_index: 2,
3160            data_block_count: 1,
3161            parity_block_count: 0,
3162            encrypted_size: 4096,
3163            decompressed_size: table_bytes.len() as u32,
3164            entry_count: 1,
3165        };
3166        DirectoryHintTable::parse(&table_bytes, &locating_hint, 1, limits).unwrap();
3167
3168        assert_eq!(
3169            DirectoryHintTable::parse(
3170                &table_bytes[..DIRECTORY_HINT_TABLE_LEN - 1],
3171                &locating_hint,
3172                1,
3173                limits,
3174            )
3175            .unwrap_err(),
3176            FormatError::InvalidMetadata {
3177                structure: "DirectoryHintTable",
3178                reason: "plaintext is shorter than fixed header",
3179            }
3180        );
3181        let mut bad_table = table_bytes.clone();
3182        bad_table[0] ^= 1;
3183        assert_eq!(
3184            DirectoryHintTable::parse(&bad_table, &locating_hint, 1, limits).unwrap_err(),
3185            FormatError::BadMagic {
3186                structure: "DirectoryHintTable"
3187            }
3188        );
3189        let mut bad_table = table_bytes.clone();
3190        bad_table[56] = 1;
3191        assert_eq!(
3192            DirectoryHintTable::parse(&bad_table, &locating_hint, 1, limits).unwrap_err(),
3193            FormatError::NonZeroReserved {
3194                structure: "DirectoryHintTable"
3195            }
3196        );
3197        let mut bad_table = table_bytes.clone();
3198        write_u64(&mut bad_table, 24, DIRECTORY_HINT_TABLE_LEN as u64 + 1);
3199        assert_eq!(
3200            DirectoryHintTable::parse(&bad_table, &locating_hint, 1, limits).unwrap_err(),
3201            FormatError::InvalidMetadata {
3202                structure: "DirectoryHintTable",
3203                reason: "entry table",
3204            }
3205        );
3206        assert_eq!(
3207            DirectoryHintTable::parse(
3208                &table_bytes[..table_bytes.len() - 1],
3209                &locating_hint,
3210                1,
3211                limits
3212            )
3213            .unwrap_err(),
3214            FormatError::InvalidMetadata {
3215                structure: "DirectoryHintTable",
3216                reason: "range is out of bounds",
3217            }
3218        );
3219        let mut bad_table = table_bytes.clone();
3220        bad_table.push(0);
3221        assert_eq!(
3222            DirectoryHintTable::parse(&bad_table, &locating_hint, 1, limits).unwrap_err(),
3223            FormatError::InvalidMetadata {
3224                structure: "DirectoryHintTable",
3225                reason: "plaintext length does not match canonical cursor",
3226            }
3227        );
3228    }
3229
3230    #[test]
3231    fn candidate_path_lookup_uses_supplied_collision_cap() {
3232        let path = b"same-prefix.txt";
3233        let hash = hash_prefix(path);
3234        let root = IndexRoot {
3235            header: IndexRootHeader::empty(),
3236            shards: (0..3)
3237                .map(|idx| ShardEntry {
3238                    shard_index: idx,
3239                    first_block_index: idx,
3240                    data_block_count: 1,
3241                    parity_block_count: 1,
3242                    encrypted_size: 4096,
3243                    decompressed_size: 256,
3244                    file_count: 1,
3245                    first_path_hash: hash,
3246                    last_path_hash: hash,
3247                })
3248                .collect(),
3249            directory_hint_shards: Vec::new(),
3250        };
3251
3252        let mut limits = MetadataLimits::default();
3253        limits.max_hash_collision_shard_scan = 0;
3254        assert_eq!(
3255            root.candidate_shards_for_path(path, limits).unwrap_err(),
3256            FormatError::HashPrefixCollisionRunExceeded
3257        );
3258
3259        limits.max_hash_collision_shard_scan = 2;
3260        assert_eq!(
3261            root.candidate_shards_for_path(path, limits).unwrap(),
3262            vec![0, 1, 2]
3263        );
3264    }
3265
3266    #[test]
3267    fn parses_single_shard_and_finds_final_file_entry() {
3268        let path = b"file.txt";
3269        let path_hash = hash_prefix(path);
3270        let file = FileEntry {
3271            path_hash,
3272            path_offset: 0,
3273            path_length: path.len() as u32,
3274            first_frame_index: 0,
3275            frame_count: 1,
3276            offset_in_first_frame_plaintext: 0,
3277            tar_member_group_size: 512,
3278            file_data_size: 0,
3279            flags: 0,
3280        };
3281        let frame = FrameEntry {
3282            frame_index: 0,
3283            envelope_index: 0,
3284            offset_in_envelope: 0,
3285            compressed_size: 128,
3286            decompressed_size: 512,
3287            flags: 0,
3288            tar_stream_offset: 0,
3289        };
3290        let envelope = EnvelopeEntry {
3291            envelope_index: 0,
3292            first_block_index: 0,
3293            data_block_count: 1,
3294            parity_block_count: 1,
3295            encrypted_size: 4096,
3296            plaintext_size: 128,
3297            first_frame_index: 0,
3298            frame_count: 1,
3299        };
3300        let shard = IndexShard {
3301            header: IndexShardHeader {
3302                version: 1,
3303                shard_index: 7,
3304                file_count: 0,
3305                frame_count: 0,
3306                envelope_count: 0,
3307                file_table_offset: 0,
3308                frame_table_offset: 0,
3309                envelope_table_offset: 0,
3310                string_pool_offset: 0,
3311                string_pool_size: 0,
3312            },
3313            files: vec![file],
3314            frames: vec![frame],
3315            envelopes: vec![envelope],
3316            string_pool: path.to_vec(),
3317            file_paths: Vec::new(),
3318            file_tar_member_group_starts: Vec::new(),
3319        };
3320        let locating = ShardEntry {
3321            shard_index: 7,
3322            first_block_index: 10,
3323            data_block_count: 1,
3324            parity_block_count: 1,
3325            encrypted_size: 4096,
3326            decompressed_size: shard.to_bytes().len() as u32,
3327            file_count: 1,
3328            first_path_hash: path_hash,
3329            last_path_hash: path_hash,
3330        };
3331
3332        let parsed =
3333            IndexShard::parse(&shard.to_bytes(), &locating, MetadataLimits::default()).unwrap();
3334
3335        assert_eq!(parsed.lookup_file_index(path), Some(0));
3336        assert_eq!(parsed.file_path(0), Some(path.as_slice()));
3337    }
3338}