1use std::collections::{BTreeSet, HashMap, HashSet};
2
3use sha2::{Digest, Sha256};
4use unicode_normalization::UnicodeNormalization;
5
6use crate::format::FormatError;
7
8const TZIR_MAGIC: [u8; 4] = *b"TZIR";
9const TZIS_MAGIC: [u8; 4] = *b"TZIS";
10const TZDH_MAGIC: [u8; 4] = *b"TZDH";
11
12pub const INDEX_ROOT_LEN: usize = 160;
13pub const SHARD_ENTRY_LEN: usize = 52;
14pub const DIRECTORY_HINT_SHARD_ENTRY_LEN: usize = 56;
15pub const ENVELOPE_ENTRY_LEN: usize = 48;
16pub const FRAME_ENTRY_LEN: usize = 44;
17pub const INDEX_SHARD_HEADER_LEN: usize = 64;
18pub const FILE_ENTRY_LEN: usize = 56;
19pub const DIRECTORY_HINT_TABLE_LEN: usize = 72;
20pub const DIRECTORY_HINT_ENTRY_LEN: usize = 40;
21
22const FRAME_KNOWN_FLAGS: u32 = 0x0000_0003;
23const DEFAULT_MAX_HASH_COLLISION_SHARD_SCAN: usize = 16;
24const REED_SOLOMON_GF16_MAX_TOTAL_SHARDS: u64 = 65_535;
25const SHA256_EMPTY: [u8; 32] = [
26 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
27 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
28];
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
31pub struct MetadataLimits {
32 pub block_size: u32,
33 pub max_path_length: u32,
34 pub max_hash_collision_shard_scan: usize,
35 pub max_shard_count: u32,
36 pub max_directory_hint_shards: u32,
37 pub max_files_per_index_shard: u32,
38 pub max_entries_per_directory_hint_shard: u64,
39 pub max_payload_data_shards: u16,
40 pub max_payload_parity_shards: u16,
41 pub max_index_data_shards: u16,
42 pub max_index_parity_shards: u16,
43 pub max_index_root_data_shards: u16,
44 pub max_index_root_parity_shards: u16,
45}
46
47impl Default for MetadataLimits {
48 fn default() -> Self {
49 Self {
50 block_size: 4096,
51 max_path_length: 4096,
52 max_hash_collision_shard_scan: DEFAULT_MAX_HASH_COLLISION_SHARD_SCAN,
53 max_shard_count: 1_000_000,
54 max_directory_hint_shards: 1_000_000,
55 max_files_per_index_shard: 1_000_000,
56 max_entries_per_directory_hint_shard: 1_000_000,
57 max_payload_data_shards: u16::MAX,
58 max_payload_parity_shards: u16::MAX,
59 max_index_data_shards: u16::MAX,
60 max_index_parity_shards: u16::MAX,
61 max_index_root_data_shards: u16::MAX,
62 max_index_root_parity_shards: u16::MAX,
63 }
64 }
65}
66
67#[derive(Debug, Clone, PartialEq, Eq)]
68pub struct IndexRootHeader {
69 pub version: u32,
70 pub shard_count: u32,
71 pub directory_hint_shard_count: u32,
72 pub frame_count: u64,
73 pub envelope_count: u64,
74 pub file_count: u64,
75 pub payload_block_count: u64,
76 pub tar_total_size: u64,
77 pub content_sha256: [u8; 32],
78 pub shard_table_offset: u64,
79 pub directory_hint_shard_table_offset: u64,
80 pub dictionary_first_block: u64,
81 pub dictionary_data_block_count: u32,
82 pub dictionary_parity_block_count: u32,
83 pub dictionary_encrypted_size: u32,
84 pub dictionary_decompressed_size: u32,
85}
86
87#[derive(Debug, Clone, PartialEq, Eq)]
88pub struct IndexRoot {
89 pub header: IndexRootHeader,
90 pub shards: Vec<ShardEntry>,
91 pub directory_hint_shards: Vec<DirectoryHintShardEntry>,
92}
93
94#[derive(Debug, Clone, PartialEq, Eq)]
95pub struct ShardEntry {
96 pub shard_index: u64,
97 pub first_block_index: u64,
98 pub data_block_count: u32,
99 pub parity_block_count: u32,
100 pub encrypted_size: u32,
101 pub decompressed_size: u32,
102 pub file_count: u32,
103 pub first_path_hash: [u8; 8],
104 pub last_path_hash: [u8; 8],
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
108pub struct DirectoryHintShardEntry {
109 pub hint_shard_index: u64,
110 pub first_dir_hash: [u8; 8],
111 pub last_dir_hash: [u8; 8],
112 pub first_block_index: u64,
113 pub data_block_count: u32,
114 pub parity_block_count: u32,
115 pub encrypted_size: u32,
116 pub decompressed_size: u32,
117 pub entry_count: u64,
118}
119
120#[derive(Debug, Clone, PartialEq, Eq)]
121pub struct EnvelopeEntry {
122 pub envelope_index: u64,
123 pub first_block_index: u64,
124 pub data_block_count: u32,
125 pub parity_block_count: u32,
126 pub encrypted_size: u32,
127 pub plaintext_size: u32,
128 pub first_frame_index: u64,
129 pub frame_count: u32,
130}
131
132#[derive(Debug, Clone, PartialEq, Eq)]
133pub struct FrameEntry {
134 pub frame_index: u64,
135 pub envelope_index: u64,
136 pub offset_in_envelope: u32,
137 pub compressed_size: u32,
138 pub decompressed_size: u32,
139 pub flags: u32,
140 pub tar_stream_offset: u64,
141}
142
143#[derive(Debug, Clone, PartialEq, Eq)]
144pub struct IndexShardHeader {
145 pub version: u32,
146 pub shard_index: u64,
147 pub file_count: u32,
148 pub frame_count: u32,
149 pub envelope_count: u32,
150 pub file_table_offset: u32,
151 pub frame_table_offset: u32,
152 pub envelope_table_offset: u32,
153 pub string_pool_offset: u32,
154 pub string_pool_size: u32,
155}
156
157#[derive(Debug, Clone, PartialEq, Eq)]
158pub struct IndexShard {
159 pub header: IndexShardHeader,
160 pub files: Vec<FileEntry>,
161 pub frames: Vec<FrameEntry>,
162 pub envelopes: Vec<EnvelopeEntry>,
163 pub string_pool: Vec<u8>,
164 file_paths: Vec<Vec<u8>>,
165 file_tar_member_group_starts: Vec<u64>,
166}
167
168#[derive(Debug, Clone, PartialEq, Eq)]
169pub struct FileEntry {
170 pub path_hash: [u8; 8],
171 pub path_offset: u32,
172 pub path_length: u32,
173 pub first_frame_index: u64,
174 pub frame_count: u32,
175 pub offset_in_first_frame_plaintext: u32,
176 pub tar_member_group_size: u64,
177 pub file_data_size: u64,
178 pub flags: u32,
179}
180
181#[derive(Debug, Clone, PartialEq, Eq)]
182pub struct DirectoryHintTable {
183 pub header: DirectoryHintTableHeader,
184 pub entries: Vec<DirectoryHintEntry>,
185 pub shard_row_indexes: Vec<u32>,
186 pub string_pool: Vec<u8>,
187 entry_paths: Vec<Vec<u8>>,
188}
189
190#[derive(Debug, Clone, PartialEq, Eq)]
191pub struct DirectoryHintTableHeader {
192 pub version: u32,
193 pub hint_shard_index: u64,
194 pub entry_count: u64,
195 pub entry_table_offset: u64,
196 pub shard_list_offset: u64,
197 pub string_pool_offset: u64,
198 pub string_pool_size: u64,
199}
200
201#[derive(Debug, Clone, PartialEq, Eq)]
202pub struct DirectoryHintEntry {
203 pub dir_hash: [u8; 8],
204 pub path_offset: u64,
205 pub path_length: u32,
206 pub shard_list_start_index: u32,
207 pub shard_count: u32,
208}
209
210impl IndexRoot {
211 pub fn parse(
212 bytes: &[u8],
213 has_dictionary: bool,
214 limits: MetadataLimits,
215 ) -> Result<Self, FormatError> {
216 let structure = "IndexRoot";
217 if bytes.len() < INDEX_ROOT_LEN {
218 return invalid(structure, "plaintext is shorter than fixed header");
219 }
220 expect_magic(structure, TZIR_MAGIC, read_array::<4>(bytes, 0, structure)?)?;
221 expect_zero(structure, slice(bytes, 128, 32, structure)?)?;
222
223 let header = IndexRootHeader {
224 version: read_u32(bytes, 4, structure)?,
225 shard_count: read_u32(bytes, 8, structure)?,
226 directory_hint_shard_count: read_u32(bytes, 12, structure)?,
227 frame_count: read_u64(bytes, 16, structure)?,
228 envelope_count: read_u64(bytes, 24, structure)?,
229 file_count: read_u64(bytes, 32, structure)?,
230 payload_block_count: read_u64(bytes, 40, structure)?,
231 tar_total_size: read_u64(bytes, 48, structure)?,
232 content_sha256: read_array::<32>(bytes, 56, structure)?,
233 shard_table_offset: read_u64(bytes, 88, structure)?,
234 directory_hint_shard_table_offset: read_u64(bytes, 96, structure)?,
235 dictionary_first_block: read_u64(bytes, 104, structure)?,
236 dictionary_data_block_count: read_u32(bytes, 112, structure)?,
237 dictionary_parity_block_count: read_u32(bytes, 116, structure)?,
238 dictionary_encrypted_size: read_u32(bytes, 120, structure)?,
239 dictionary_decompressed_size: read_u32(bytes, 124, structure)?,
240 };
241
242 if header.version != 1 {
243 return invalid(structure, "unsupported version");
244 }
245 if header.shard_count > limits.max_shard_count {
246 return invalid(structure, "shard count exceeds resource cap");
247 }
248 if header.directory_hint_shard_count > limits.max_directory_hint_shards {
249 return invalid(structure, "directory hint shard count exceeds resource cap");
250 }
251 validate_dictionary_fields(&header, has_dictionary, limits)?;
252
253 let mut cursor = INDEX_ROOT_LEN;
254 let shards = if header.shard_count == 0 {
255 if header.shard_table_offset != 0 {
256 return invalid(structure, "absent shard table has non-zero offset");
257 }
258 Vec::new()
259 } else {
260 expect_offset(structure, "shard table", header.shard_table_offset, cursor)?;
261 let count = to_usize(header.shard_count as u64, structure)?;
262 let bytes_len = checked_mul(count, SHARD_ENTRY_LEN, structure)?;
263 let table = slice(bytes, cursor, bytes_len, structure)?;
264 cursor = checked_add(cursor, bytes_len, structure)?;
265 parse_shard_entries(table, limits)?
266 };
267
268 let directory_hint_shards = if header.directory_hint_shard_count == 0 {
269 if header.directory_hint_shard_table_offset != 0 {
270 return invalid(
271 structure,
272 "absent directory hint shard table has non-zero offset",
273 );
274 }
275 Vec::new()
276 } else {
277 if header.shard_count == 0 {
278 return invalid(structure, "directory hints require at least one shard");
279 }
280 expect_offset(
281 structure,
282 "directory hint shard table",
283 header.directory_hint_shard_table_offset,
284 cursor,
285 )?;
286 let count = to_usize(header.directory_hint_shard_count as u64, structure)?;
287 let bytes_len = checked_mul(count, DIRECTORY_HINT_SHARD_ENTRY_LEN, structure)?;
288 let table = slice(bytes, cursor, bytes_len, structure)?;
289 cursor = checked_add(cursor, bytes_len, structure)?;
290 parse_directory_hint_shard_entries(table, limits)?
291 };
292
293 if bytes.len() != cursor {
294 return invalid(
295 structure,
296 "plaintext length does not match canonical cursor",
297 );
298 }
299 validate_index_root_totals(&header, &shards, has_dictionary)?;
300
301 Ok(Self {
302 header,
303 shards,
304 directory_hint_shards,
305 })
306 }
307
308 pub fn to_bytes(&self) -> Vec<u8> {
309 let mut header = self.header.clone();
310 header.shard_count = self.shards.len() as u32;
311 header.directory_hint_shard_count = self.directory_hint_shards.len() as u32;
312 header.shard_table_offset = if self.shards.is_empty() {
313 0
314 } else {
315 INDEX_ROOT_LEN as u64
316 };
317 header.directory_hint_shard_table_offset = if self.directory_hint_shards.is_empty() {
318 0
319 } else {
320 (INDEX_ROOT_LEN + self.shards.len() * SHARD_ENTRY_LEN) as u64
321 };
322
323 let mut bytes = Vec::with_capacity(
324 INDEX_ROOT_LEN
325 + self.shards.len() * SHARD_ENTRY_LEN
326 + self.directory_hint_shards.len() * DIRECTORY_HINT_SHARD_ENTRY_LEN,
327 );
328 bytes.extend_from_slice(&header.to_bytes());
329 for entry in &self.shards {
330 bytes.extend_from_slice(&entry.to_bytes());
331 }
332 for entry in &self.directory_hint_shards {
333 bytes.extend_from_slice(&entry.to_bytes());
334 }
335 bytes
336 }
337
338 pub fn candidate_shard_indexes_for_hash(
339 &self,
340 target_hash: [u8; 8],
341 scan_cap_per_direction: usize,
342 ) -> Result<Vec<usize>, FormatError> {
343 candidate_interval_indexes(
344 &self.shards,
345 target_hash,
346 scan_cap_per_direction,
347 |entry| entry.first_path_hash,
348 |entry| entry.last_path_hash,
349 )
350 }
351
352 pub fn candidate_shards_for_path(
353 &self,
354 normalized_path: &[u8],
355 limits: MetadataLimits,
356 ) -> Result<Vec<usize>, FormatError> {
357 self.candidate_shard_indexes_for_hash(
358 hash_prefix(normalized_path),
359 limits.max_hash_collision_shard_scan,
360 )
361 }
362}
363
364impl IndexRootHeader {
365 pub fn empty() -> Self {
366 Self {
367 version: 1,
368 shard_count: 0,
369 directory_hint_shard_count: 0,
370 frame_count: 0,
371 envelope_count: 0,
372 file_count: 0,
373 payload_block_count: 0,
374 tar_total_size: 0,
375 content_sha256: SHA256_EMPTY,
376 shard_table_offset: 0,
377 directory_hint_shard_table_offset: 0,
378 dictionary_first_block: 0,
379 dictionary_data_block_count: 0,
380 dictionary_parity_block_count: 0,
381 dictionary_encrypted_size: 0,
382 dictionary_decompressed_size: 0,
383 }
384 }
385
386 pub fn to_bytes(&self) -> [u8; INDEX_ROOT_LEN] {
387 let mut bytes = [0u8; INDEX_ROOT_LEN];
388 bytes[0..4].copy_from_slice(&TZIR_MAGIC);
389 write_u32(&mut bytes, 4, self.version);
390 write_u32(&mut bytes, 8, self.shard_count);
391 write_u32(&mut bytes, 12, self.directory_hint_shard_count);
392 write_u64(&mut bytes, 16, self.frame_count);
393 write_u64(&mut bytes, 24, self.envelope_count);
394 write_u64(&mut bytes, 32, self.file_count);
395 write_u64(&mut bytes, 40, self.payload_block_count);
396 write_u64(&mut bytes, 48, self.tar_total_size);
397 bytes[56..88].copy_from_slice(&self.content_sha256);
398 write_u64(&mut bytes, 88, self.shard_table_offset);
399 write_u64(&mut bytes, 96, self.directory_hint_shard_table_offset);
400 write_u64(&mut bytes, 104, self.dictionary_first_block);
401 write_u32(&mut bytes, 112, self.dictionary_data_block_count);
402 write_u32(&mut bytes, 116, self.dictionary_parity_block_count);
403 write_u32(&mut bytes, 120, self.dictionary_encrypted_size);
404 write_u32(&mut bytes, 124, self.dictionary_decompressed_size);
405 bytes
406 }
407}
408
409impl ShardEntry {
410 pub fn to_bytes(&self) -> [u8; SHARD_ENTRY_LEN] {
411 let mut bytes = [0u8; SHARD_ENTRY_LEN];
412 write_u64(&mut bytes, 0, self.shard_index);
413 write_u64(&mut bytes, 8, self.first_block_index);
414 write_u32(&mut bytes, 16, self.data_block_count);
415 write_u32(&mut bytes, 20, self.parity_block_count);
416 write_u32(&mut bytes, 24, self.encrypted_size);
417 write_u32(&mut bytes, 28, self.decompressed_size);
418 write_u32(&mut bytes, 32, self.file_count);
419 bytes[36..44].copy_from_slice(&self.first_path_hash);
420 bytes[44..52].copy_from_slice(&self.last_path_hash);
421 bytes
422 }
423}
424
425impl DirectoryHintShardEntry {
426 pub fn to_bytes(&self) -> [u8; DIRECTORY_HINT_SHARD_ENTRY_LEN] {
427 let mut bytes = [0u8; DIRECTORY_HINT_SHARD_ENTRY_LEN];
428 write_u64(&mut bytes, 0, self.hint_shard_index);
429 bytes[8..16].copy_from_slice(&self.first_dir_hash);
430 bytes[16..24].copy_from_slice(&self.last_dir_hash);
431 write_u64(&mut bytes, 24, self.first_block_index);
432 write_u32(&mut bytes, 32, self.data_block_count);
433 write_u32(&mut bytes, 36, self.parity_block_count);
434 write_u32(&mut bytes, 40, self.encrypted_size);
435 write_u32(&mut bytes, 44, self.decompressed_size);
436 write_u64(&mut bytes, 48, self.entry_count);
437 bytes
438 }
439}
440
441impl IndexShard {
442 pub fn parse(
443 bytes: &[u8],
444 locating_shard: &ShardEntry,
445 limits: MetadataLimits,
446 ) -> Result<Self, FormatError> {
447 let structure = "IndexShard";
448 if bytes.len() < INDEX_SHARD_HEADER_LEN {
449 return invalid(structure, "plaintext is shorter than fixed header");
450 }
451 expect_magic(structure, TZIS_MAGIC, read_array::<4>(bytes, 0, structure)?)?;
452 expect_zero(structure, slice(bytes, 48, 16, structure)?)?;
453
454 let header = IndexShardHeader {
455 version: read_u32(bytes, 4, structure)?,
456 shard_index: read_u64(bytes, 8, structure)?,
457 file_count: read_u32(bytes, 16, structure)?,
458 frame_count: read_u32(bytes, 20, structure)?,
459 envelope_count: read_u32(bytes, 24, structure)?,
460 file_table_offset: read_u32(bytes, 28, structure)?,
461 frame_table_offset: read_u32(bytes, 32, structure)?,
462 envelope_table_offset: read_u32(bytes, 36, structure)?,
463 string_pool_offset: read_u32(bytes, 40, structure)?,
464 string_pool_size: read_u32(bytes, 44, structure)?,
465 };
466
467 if header.version != 1 {
468 return invalid(structure, "unsupported version");
469 }
470 if header.file_count == 0 {
471 return invalid(structure, "index shard must contain at least one file");
472 }
473 if header.file_count > limits.max_files_per_index_shard {
474 return invalid(structure, "file count exceeds resource cap");
475 }
476 if header.shard_index != locating_shard.shard_index {
477 return invalid(structure, "shard index does not match locating ShardEntry");
478 }
479 if header.file_count != locating_shard.file_count {
480 return invalid(structure, "file count does not match locating ShardEntry");
481 }
482
483 let mut cursor = INDEX_SHARD_HEADER_LEN;
484 let files = parse_counted_table(
485 bytes,
486 structure,
487 "file table",
488 header.file_count as u64,
489 header.file_table_offset as u64,
490 FILE_ENTRY_LEN,
491 &mut cursor,
492 parse_file_entry,
493 )?;
494 let frames = parse_counted_table(
495 bytes,
496 structure,
497 "frame table",
498 header.frame_count as u64,
499 header.frame_table_offset as u64,
500 FRAME_ENTRY_LEN,
501 &mut cursor,
502 parse_frame_entry,
503 )?;
504 let envelopes = parse_counted_table(
505 bytes,
506 structure,
507 "envelope table",
508 header.envelope_count as u64,
509 header.envelope_table_offset as u64,
510 ENVELOPE_ENTRY_LEN,
511 &mut cursor,
512 parse_envelope_entry,
513 )?;
514 let string_pool = if header.string_pool_size == 0 {
515 if header.string_pool_offset != 0 {
516 return invalid(structure, "absent string pool has non-zero offset");
517 }
518 Vec::new()
519 } else {
520 expect_offset(
521 structure,
522 "string pool",
523 header.string_pool_offset as u64,
524 cursor,
525 )?;
526 let len = header.string_pool_size as usize;
527 let pool = slice(bytes, cursor, len, structure)?.to_vec();
528 cursor = checked_add(cursor, len, structure)?;
529 pool
530 };
531 if bytes.len() != cursor {
532 return invalid(
533 structure,
534 "plaintext length does not match canonical cursor",
535 );
536 }
537
538 let (file_paths, file_tar_member_group_starts) = validate_index_shard_tables(
539 &files,
540 &frames,
541 &envelopes,
542 &string_pool,
543 locating_shard,
544 limits,
545 )?;
546
547 Ok(Self {
548 header,
549 files,
550 frames,
551 envelopes,
552 string_pool,
553 file_paths,
554 file_tar_member_group_starts,
555 })
556 }
557
558 pub fn to_bytes(&self) -> Vec<u8> {
559 let mut header = self.header.clone();
560 header.file_count = self.files.len() as u32;
561 header.frame_count = self.frames.len() as u32;
562 header.envelope_count = self.envelopes.len() as u32;
563
564 let mut cursor = INDEX_SHARD_HEADER_LEN;
565 header.file_table_offset = table_offset(self.files.len(), cursor);
566 cursor += self.files.len() * FILE_ENTRY_LEN;
567 header.frame_table_offset = table_offset(self.frames.len(), cursor);
568 cursor += self.frames.len() * FRAME_ENTRY_LEN;
569 header.envelope_table_offset = table_offset(self.envelopes.len(), cursor);
570 cursor += self.envelopes.len() * ENVELOPE_ENTRY_LEN;
571 header.string_pool_size = self.string_pool.len() as u32;
572 header.string_pool_offset = table_offset(self.string_pool.len(), cursor);
573
574 let mut bytes = Vec::with_capacity(cursor + self.string_pool.len());
575 bytes.extend_from_slice(&header.to_bytes());
576 for entry in &self.files {
577 bytes.extend_from_slice(&entry.to_bytes());
578 }
579 for entry in &self.frames {
580 bytes.extend_from_slice(&entry.to_bytes());
581 }
582 for entry in &self.envelopes {
583 bytes.extend_from_slice(&entry.to_bytes());
584 }
585 bytes.extend_from_slice(&self.string_pool);
586 bytes
587 }
588
589 pub fn file_path(&self, file_index: usize) -> Option<&[u8]> {
590 self.file_paths.get(file_index).map(Vec::as_slice)
591 }
592
593 pub fn tar_member_group_start(&self, file_index: usize) -> Option<u64> {
594 self.file_tar_member_group_starts.get(file_index).copied()
595 }
596
597 pub fn lookup_file_index(&self, normalized_path: &[u8]) -> Option<usize> {
598 let target_hash = hash_prefix(normalized_path);
599 let lower = self.lower_bound_file_key(target_hash, normalized_path);
600
601 let mut best = None;
602 for idx in lower..self.files.len() {
603 let file = &self.files[idx];
604 if file.path_hash != target_hash || self.file_paths[idx].as_slice() != normalized_path {
605 break;
606 }
607 best = Some(idx);
608 }
609 best
610 }
611
612 fn lower_bound_file_key(&self, target_hash: [u8; 8], target_path: &[u8]) -> usize {
613 let mut low = 0usize;
614 let mut high = self.files.len();
615 while low < high {
616 let mid = low + (high - low) / 2;
617 let key_is_less = self.files[mid].path_hash < target_hash
618 || (self.files[mid].path_hash == target_hash
619 && self.file_paths[mid].as_slice() < target_path);
620 if key_is_less {
621 low = mid + 1;
622 } else {
623 high = mid;
624 }
625 }
626 low
627 }
628}
629
630impl IndexShardHeader {
631 pub fn to_bytes(&self) -> [u8; INDEX_SHARD_HEADER_LEN] {
632 let mut bytes = [0u8; INDEX_SHARD_HEADER_LEN];
633 bytes[0..4].copy_from_slice(&TZIS_MAGIC);
634 write_u32(&mut bytes, 4, self.version);
635 write_u64(&mut bytes, 8, self.shard_index);
636 write_u32(&mut bytes, 16, self.file_count);
637 write_u32(&mut bytes, 20, self.frame_count);
638 write_u32(&mut bytes, 24, self.envelope_count);
639 write_u32(&mut bytes, 28, self.file_table_offset);
640 write_u32(&mut bytes, 32, self.frame_table_offset);
641 write_u32(&mut bytes, 36, self.envelope_table_offset);
642 write_u32(&mut bytes, 40, self.string_pool_offset);
643 write_u32(&mut bytes, 44, self.string_pool_size);
644 bytes
645 }
646}
647
648impl FileEntry {
649 pub fn to_bytes(&self) -> [u8; FILE_ENTRY_LEN] {
650 let mut bytes = [0u8; FILE_ENTRY_LEN];
651 bytes[0..8].copy_from_slice(&self.path_hash);
652 write_u32(&mut bytes, 8, self.path_offset);
653 write_u32(&mut bytes, 12, self.path_length);
654 write_u64(&mut bytes, 16, self.first_frame_index);
655 write_u32(&mut bytes, 24, self.frame_count);
656 write_u32(&mut bytes, 28, self.offset_in_first_frame_plaintext);
657 write_u64(&mut bytes, 32, self.tar_member_group_size);
658 write_u64(&mut bytes, 40, self.file_data_size);
659 write_u32(&mut bytes, 48, self.flags);
660 bytes
661 }
662}
663
664impl FrameEntry {
665 pub fn to_bytes(&self) -> [u8; FRAME_ENTRY_LEN] {
666 let mut bytes = [0u8; FRAME_ENTRY_LEN];
667 write_u64(&mut bytes, 0, self.frame_index);
668 write_u64(&mut bytes, 8, self.envelope_index);
669 write_u32(&mut bytes, 16, self.offset_in_envelope);
670 write_u32(&mut bytes, 20, self.compressed_size);
671 write_u32(&mut bytes, 24, self.decompressed_size);
672 write_u32(&mut bytes, 28, self.flags);
673 write_u64(&mut bytes, 32, self.tar_stream_offset);
674 bytes
675 }
676}
677
678impl EnvelopeEntry {
679 pub fn to_bytes(&self) -> [u8; ENVELOPE_ENTRY_LEN] {
680 let mut bytes = [0u8; ENVELOPE_ENTRY_LEN];
681 write_u64(&mut bytes, 0, self.envelope_index);
682 write_u64(&mut bytes, 8, self.first_block_index);
683 write_u32(&mut bytes, 16, self.data_block_count);
684 write_u32(&mut bytes, 20, self.parity_block_count);
685 write_u32(&mut bytes, 24, self.encrypted_size);
686 write_u32(&mut bytes, 28, self.plaintext_size);
687 write_u64(&mut bytes, 32, self.first_frame_index);
688 write_u32(&mut bytes, 40, self.frame_count);
689 bytes
690 }
691}
692
693impl DirectoryHintTable {
694 pub fn parse(
695 bytes: &[u8],
696 locating_shard: &DirectoryHintShardEntry,
697 index_root_shard_count: u32,
698 limits: MetadataLimits,
699 ) -> Result<Self, FormatError> {
700 let structure = "DirectoryHintTable";
701 if bytes.len() < DIRECTORY_HINT_TABLE_LEN {
702 return invalid(structure, "plaintext is shorter than fixed header");
703 }
704 expect_magic(structure, TZDH_MAGIC, read_array::<4>(bytes, 0, structure)?)?;
705 expect_zero(structure, slice(bytes, 56, 16, structure)?)?;
706
707 let header = DirectoryHintTableHeader {
708 version: read_u32(bytes, 4, structure)?,
709 hint_shard_index: read_u64(bytes, 8, structure)?,
710 entry_count: read_u64(bytes, 16, structure)?,
711 entry_table_offset: read_u64(bytes, 24, structure)?,
712 shard_list_offset: read_u64(bytes, 32, structure)?,
713 string_pool_offset: read_u64(bytes, 40, structure)?,
714 string_pool_size: read_u64(bytes, 48, structure)?,
715 };
716 if header.version != 1 {
717 return invalid(structure, "unsupported version");
718 }
719 if header.hint_shard_index != locating_shard.hint_shard_index {
720 return invalid(
721 structure,
722 "hint shard index does not match locating DirectoryHintShardEntry",
723 );
724 }
725 if header.entry_count != locating_shard.entry_count {
726 return invalid(
727 structure,
728 "entry count does not match locating DirectoryHintShardEntry",
729 );
730 }
731 if header.entry_count == 0 {
732 return invalid(structure, "located directory hint shard is empty");
733 }
734 if header.entry_count > limits.max_entries_per_directory_hint_shard {
735 return invalid(structure, "entry count exceeds resource cap");
736 }
737
738 let entry_count = to_usize(header.entry_count, structure)?;
739 expect_offset(
740 structure,
741 "entry table",
742 header.entry_table_offset,
743 DIRECTORY_HINT_TABLE_LEN,
744 )?;
745 let entry_bytes_len = checked_mul(entry_count, DIRECTORY_HINT_ENTRY_LEN, structure)?;
746 let entries_end = checked_add(DIRECTORY_HINT_TABLE_LEN, entry_bytes_len, structure)?;
747 expect_offset(
748 structure,
749 "shard list",
750 header.shard_list_offset,
751 entries_end,
752 )?;
753 if header.shard_list_offset % 4 != 0 {
754 return invalid(structure, "shard list is not 4-byte aligned");
755 }
756
757 let entry_bytes = slice(bytes, DIRECTORY_HINT_TABLE_LEN, entry_bytes_len, structure)?;
758 let entries = parse_directory_hint_entries(entry_bytes)?;
759 let shard_list_len = validate_directory_hint_entries(
760 &entries,
761 bytes,
762 &header,
763 locating_shard,
764 index_root_shard_count,
765 )?;
766 let shard_list_offset = to_usize(header.shard_list_offset, structure)?;
767 let shard_list_bytes_len = checked_mul(shard_list_len, 4, structure)?;
768 let shard_list_end = checked_add(shard_list_offset, shard_list_bytes_len, structure)?;
769 let shard_list_bytes = slice(bytes, shard_list_offset, shard_list_bytes_len, structure)?;
770 let shard_row_indexes = parse_u32_array(shard_list_bytes, structure)?;
771
772 let string_pool = if header.string_pool_size == 0 {
773 if header.string_pool_offset != 0 {
774 return invalid(structure, "absent string pool has non-zero offset");
775 }
776 Vec::new()
777 } else {
778 expect_offset(
779 structure,
780 "string pool",
781 header.string_pool_offset,
782 shard_list_end,
783 )?;
784 let offset = to_usize(header.string_pool_offset, structure)?;
785 let size = to_usize(header.string_pool_size, structure)?;
786 slice(bytes, offset, size, structure)?.to_vec()
787 };
788 let final_cursor = if header.string_pool_size == 0 {
789 shard_list_end
790 } else {
791 checked_add(
792 to_usize(header.string_pool_offset, structure)?,
793 to_usize(header.string_pool_size, structure)?,
794 structure,
795 )?
796 };
797 if bytes.len() != final_cursor {
798 return invalid(
799 structure,
800 "plaintext length does not match canonical cursor",
801 );
802 }
803
804 let entry_paths = validate_directory_hint_paths_and_lists(
805 &entries,
806 &shard_row_indexes,
807 &string_pool,
808 locating_shard,
809 index_root_shard_count,
810 limits.max_path_length,
811 )?;
812
813 Ok(Self {
814 header,
815 entries,
816 shard_row_indexes,
817 string_pool,
818 entry_paths,
819 })
820 }
821
822 pub fn to_bytes(&self) -> Vec<u8> {
823 let mut header = self.header.clone();
824 header.entry_count = self.entries.len() as u64;
825 header.entry_table_offset = if self.entries.is_empty() {
826 0
827 } else {
828 DIRECTORY_HINT_TABLE_LEN as u64
829 };
830 header.shard_list_offset = if self.entries.is_empty() {
831 0
832 } else {
833 (DIRECTORY_HINT_TABLE_LEN + self.entries.len() * DIRECTORY_HINT_ENTRY_LEN) as u64
834 };
835 header.string_pool_size = self.string_pool.len() as u64;
836 header.string_pool_offset = if self.string_pool.is_empty() {
837 0
838 } else {
839 header.shard_list_offset + (self.shard_row_indexes.len() as u64) * 4
840 };
841
842 let mut bytes = Vec::with_capacity(
843 DIRECTORY_HINT_TABLE_LEN
844 + self.entries.len() * DIRECTORY_HINT_ENTRY_LEN
845 + self.shard_row_indexes.len() * 4
846 + self.string_pool.len(),
847 );
848 bytes.extend_from_slice(&header.to_bytes());
849 for entry in &self.entries {
850 bytes.extend_from_slice(&entry.to_bytes());
851 }
852 if !self.entries.is_empty() {
853 for row in &self.shard_row_indexes {
854 let mut raw = [0u8; 4];
855 write_u32(&mut raw, 0, *row);
856 bytes.extend_from_slice(&raw);
857 }
858 }
859 bytes.extend_from_slice(&self.string_pool);
860 bytes
861 }
862
863 pub fn entry_path(&self, entry_index: usize) -> Option<&[u8]> {
864 self.entry_paths.get(entry_index).map(Vec::as_slice)
865 }
866
867 pub fn lookup_directory_index(&self, normalized_dir_path: &[u8]) -> Option<usize> {
868 let target_hash = hash_prefix(normalized_dir_path);
869 let lower = self.lower_bound_directory_key(target_hash, normalized_dir_path);
870 for idx in lower..self.entries.len() {
871 let entry = &self.entries[idx];
872 if entry.dir_hash != target_hash
873 || self.entry_paths[idx].as_slice() != normalized_dir_path
874 {
875 break;
876 }
877 return Some(idx);
878 }
879 None
880 }
881
882 fn lower_bound_directory_key(&self, target_hash: [u8; 8], target_path: &[u8]) -> usize {
883 let mut low = 0usize;
884 let mut high = self.entries.len();
885 while low < high {
886 let mid = low + (high - low) / 2;
887 let key_is_less = self.entries[mid].dir_hash < target_hash
888 || (self.entries[mid].dir_hash == target_hash
889 && self.entry_paths[mid].as_slice() < target_path);
890 if key_is_less {
891 low = mid + 1;
892 } else {
893 high = mid;
894 }
895 }
896 low
897 }
898
899 pub fn shard_rows_for_entry(&self, entry_index: usize) -> Option<&[u32]> {
900 let entry = self.entries.get(entry_index)?;
901 let start = entry.shard_list_start_index as usize;
902 let end = start.checked_add(entry.shard_count as usize)?;
903 self.shard_row_indexes.get(start..end)
904 }
905}
906
907impl DirectoryHintTableHeader {
908 pub fn to_bytes(&self) -> [u8; DIRECTORY_HINT_TABLE_LEN] {
909 let mut bytes = [0u8; DIRECTORY_HINT_TABLE_LEN];
910 bytes[0..4].copy_from_slice(&TZDH_MAGIC);
911 write_u32(&mut bytes, 4, self.version);
912 write_u64(&mut bytes, 8, self.hint_shard_index);
913 write_u64(&mut bytes, 16, self.entry_count);
914 write_u64(&mut bytes, 24, self.entry_table_offset);
915 write_u64(&mut bytes, 32, self.shard_list_offset);
916 write_u64(&mut bytes, 40, self.string_pool_offset);
917 write_u64(&mut bytes, 48, self.string_pool_size);
918 bytes
919 }
920}
921
922impl DirectoryHintEntry {
923 pub fn to_bytes(&self) -> [u8; DIRECTORY_HINT_ENTRY_LEN] {
924 let mut bytes = [0u8; DIRECTORY_HINT_ENTRY_LEN];
925 bytes[0..8].copy_from_slice(&self.dir_hash);
926 write_u64(&mut bytes, 8, self.path_offset);
927 write_u32(&mut bytes, 16, self.path_length);
928 write_u32(&mut bytes, 24, self.shard_list_start_index);
929 write_u32(&mut bytes, 28, self.shard_count);
930 bytes
931 }
932}
933
934pub fn hash_prefix(bytes: &[u8]) -> [u8; 8] {
935 let digest = Sha256::digest(bytes);
936 let mut out = [0u8; 8];
937 out.copy_from_slice(&digest[..8]);
938 out
939}
940
941pub fn normalize_lookup_file_path(
942 path: &str,
943 max_path_length: u32,
944) -> Result<Vec<u8>, FormatError> {
945 let normalized = path.nfc().collect::<String>();
946 validate_file_path_bytes(normalized.as_bytes(), max_path_length)?;
947 Ok(normalized.into_bytes())
948}
949
950pub fn normalize_lookup_directory_path(
951 path: &str,
952 max_path_length: u32,
953) -> Result<Vec<u8>, FormatError> {
954 let trimmed = path.strip_suffix('/').unwrap_or(path);
955 let normalized = trimmed.nfc().collect::<String>();
956 validate_directory_path_bytes(normalized.as_bytes(), max_path_length)?;
957 Ok(normalized.into_bytes())
958}
959
960pub fn is_directory_ancestor(directory_path: &[u8], file_path: &[u8]) -> bool {
961 if directory_path.is_empty() {
962 return true;
963 }
964 file_path.len() > directory_path.len()
965 && file_path.starts_with(directory_path)
966 && file_path[directory_path.len()] == b'/'
967}
968
969fn parse_shard_entries(
970 bytes: &[u8],
971 limits: MetadataLimits,
972) -> Result<Vec<ShardEntry>, FormatError> {
973 let mut entries = Vec::with_capacity(bytes.len() / SHARD_ENTRY_LEN);
974 let mut seen_indexes = HashSet::new();
975 for chunk in bytes.chunks_exact(SHARD_ENTRY_LEN) {
976 let entry = ShardEntry {
977 shard_index: read_u64(chunk, 0, "ShardEntry")?,
978 first_block_index: read_u64(chunk, 8, "ShardEntry")?,
979 data_block_count: read_u32(chunk, 16, "ShardEntry")?,
980 parity_block_count: read_u32(chunk, 20, "ShardEntry")?,
981 encrypted_size: read_u32(chunk, 24, "ShardEntry")?,
982 decompressed_size: read_u32(chunk, 28, "ShardEntry")?,
983 file_count: read_u32(chunk, 32, "ShardEntry")?,
984 first_path_hash: read_array::<8>(chunk, 36, "ShardEntry")?,
985 last_path_hash: read_array::<8>(chunk, 44, "ShardEntry")?,
986 };
987 if entry.file_count == 0 {
988 return invalid("ShardEntry", "file count is zero");
989 }
990 if entry.decompressed_size == 0 {
991 return invalid("ShardEntry", "decompressed size is zero");
992 }
993 validate_encrypted_extent(
994 "ShardEntry",
995 entry.data_block_count,
996 entry.encrypted_size,
997 limits.block_size,
998 )?;
999 validate_fec_class_extent(
1000 "ShardEntry",
1001 entry.data_block_count,
1002 entry.parity_block_count,
1003 limits.max_index_data_shards,
1004 limits.max_index_parity_shards,
1005 )?;
1006 if entry.first_path_hash > entry.last_path_hash {
1007 return invalid("ShardEntry", "first hash is greater than last hash");
1008 }
1009 if !seen_indexes.insert(entry.shard_index) {
1010 return invalid("ShardEntry", "duplicate shard index");
1011 }
1012 if let Some(previous) = entries.last() {
1013 let previous: &ShardEntry = previous;
1014 if shard_entry_sort_key(previous) >= shard_entry_sort_key(&entry) {
1015 return invalid("IndexRoot", "ShardEntry rows are not sorted");
1016 }
1017 if previous.last_path_hash > entry.first_path_hash {
1018 return invalid("IndexRoot", "ShardEntry hash ranges overlap out of order");
1019 }
1020 }
1021 entries.push(entry);
1022 }
1023 Ok(entries)
1024}
1025
1026fn parse_directory_hint_shard_entries(
1027 bytes: &[u8],
1028 limits: MetadataLimits,
1029) -> Result<Vec<DirectoryHintShardEntry>, FormatError> {
1030 let mut entries = Vec::with_capacity(bytes.len() / DIRECTORY_HINT_SHARD_ENTRY_LEN);
1031 let mut seen_indexes = HashSet::new();
1032 for chunk in bytes.chunks_exact(DIRECTORY_HINT_SHARD_ENTRY_LEN) {
1033 let entry = DirectoryHintShardEntry {
1034 hint_shard_index: read_u64(chunk, 0, "DirectoryHintShardEntry")?,
1035 first_dir_hash: read_array::<8>(chunk, 8, "DirectoryHintShardEntry")?,
1036 last_dir_hash: read_array::<8>(chunk, 16, "DirectoryHintShardEntry")?,
1037 first_block_index: read_u64(chunk, 24, "DirectoryHintShardEntry")?,
1038 data_block_count: read_u32(chunk, 32, "DirectoryHintShardEntry")?,
1039 parity_block_count: read_u32(chunk, 36, "DirectoryHintShardEntry")?,
1040 encrypted_size: read_u32(chunk, 40, "DirectoryHintShardEntry")?,
1041 decompressed_size: read_u32(chunk, 44, "DirectoryHintShardEntry")?,
1042 entry_count: read_u64(chunk, 48, "DirectoryHintShardEntry")?,
1043 };
1044 if entry.entry_count == 0 {
1045 return invalid("DirectoryHintShardEntry", "entry count is zero");
1046 }
1047 if entry.decompressed_size == 0 {
1048 return invalid("DirectoryHintShardEntry", "decompressed size is zero");
1049 }
1050 validate_encrypted_extent(
1051 "DirectoryHintShardEntry",
1052 entry.data_block_count,
1053 entry.encrypted_size,
1054 limits.block_size,
1055 )?;
1056 validate_fec_class_extent(
1057 "DirectoryHintShardEntry",
1058 entry.data_block_count,
1059 entry.parity_block_count,
1060 limits.max_index_data_shards,
1061 limits.max_index_parity_shards,
1062 )?;
1063 if entry.first_dir_hash > entry.last_dir_hash {
1064 return invalid(
1065 "DirectoryHintShardEntry",
1066 "first hash is greater than last hash",
1067 );
1068 }
1069 if !seen_indexes.insert(entry.hint_shard_index) {
1070 return invalid("DirectoryHintShardEntry", "duplicate hint shard index");
1071 }
1072 if let Some(previous) = entries.last() {
1073 let previous: &DirectoryHintShardEntry = previous;
1074 if directory_hint_shard_sort_key(previous) >= directory_hint_shard_sort_key(&entry) {
1075 return invalid("IndexRoot", "DirectoryHintShardEntry rows are not sorted");
1076 }
1077 if previous.last_dir_hash > entry.first_dir_hash {
1078 return invalid(
1079 "IndexRoot",
1080 "DirectoryHintShardEntry hash ranges overlap out of order",
1081 );
1082 }
1083 }
1084 entries.push(entry);
1085 }
1086 Ok(entries)
1087}
1088
1089fn parse_file_entry(bytes: &[u8]) -> Result<FileEntry, FormatError> {
1090 expect_zero("FileEntry", slice(bytes, 52, 4, "FileEntry")?)?;
1091 Ok(FileEntry {
1092 path_hash: read_array::<8>(bytes, 0, "FileEntry")?,
1093 path_offset: read_u32(bytes, 8, "FileEntry")?,
1094 path_length: read_u32(bytes, 12, "FileEntry")?,
1095 first_frame_index: read_u64(bytes, 16, "FileEntry")?,
1096 frame_count: read_u32(bytes, 24, "FileEntry")?,
1097 offset_in_first_frame_plaintext: read_u32(bytes, 28, "FileEntry")?,
1098 tar_member_group_size: read_u64(bytes, 32, "FileEntry")?,
1099 file_data_size: read_u64(bytes, 40, "FileEntry")?,
1100 flags: read_u32(bytes, 48, "FileEntry")?,
1101 })
1102}
1103
1104fn parse_frame_entry(bytes: &[u8]) -> Result<FrameEntry, FormatError> {
1105 expect_zero("FrameEntry", slice(bytes, 40, 4, "FrameEntry")?)?;
1106 Ok(FrameEntry {
1107 frame_index: read_u64(bytes, 0, "FrameEntry")?,
1108 envelope_index: read_u64(bytes, 8, "FrameEntry")?,
1109 offset_in_envelope: read_u32(bytes, 16, "FrameEntry")?,
1110 compressed_size: read_u32(bytes, 20, "FrameEntry")?,
1111 decompressed_size: read_u32(bytes, 24, "FrameEntry")?,
1112 flags: read_u32(bytes, 28, "FrameEntry")?,
1113 tar_stream_offset: read_u64(bytes, 32, "FrameEntry")?,
1114 })
1115}
1116
1117fn parse_envelope_entry(bytes: &[u8]) -> Result<EnvelopeEntry, FormatError> {
1118 expect_zero("EnvelopeEntry", slice(bytes, 44, 4, "EnvelopeEntry")?)?;
1119 Ok(EnvelopeEntry {
1120 envelope_index: read_u64(bytes, 0, "EnvelopeEntry")?,
1121 first_block_index: read_u64(bytes, 8, "EnvelopeEntry")?,
1122 data_block_count: read_u32(bytes, 16, "EnvelopeEntry")?,
1123 parity_block_count: read_u32(bytes, 20, "EnvelopeEntry")?,
1124 encrypted_size: read_u32(bytes, 24, "EnvelopeEntry")?,
1125 plaintext_size: read_u32(bytes, 28, "EnvelopeEntry")?,
1126 first_frame_index: read_u64(bytes, 32, "EnvelopeEntry")?,
1127 frame_count: read_u32(bytes, 40, "EnvelopeEntry")?,
1128 })
1129}
1130
1131fn parse_directory_hint_entries(bytes: &[u8]) -> Result<Vec<DirectoryHintEntry>, FormatError> {
1132 let mut entries = Vec::with_capacity(bytes.len() / DIRECTORY_HINT_ENTRY_LEN);
1133 for chunk in bytes.chunks_exact(DIRECTORY_HINT_ENTRY_LEN) {
1134 expect_zero(
1135 "DirectoryHintEntry",
1136 slice(chunk, 20, 4, "DirectoryHintEntry")?,
1137 )?;
1138 expect_zero(
1139 "DirectoryHintEntry",
1140 slice(chunk, 32, 8, "DirectoryHintEntry")?,
1141 )?;
1142 entries.push(DirectoryHintEntry {
1143 dir_hash: read_array::<8>(chunk, 0, "DirectoryHintEntry")?,
1144 path_offset: read_u64(chunk, 8, "DirectoryHintEntry")?,
1145 path_length: read_u32(chunk, 16, "DirectoryHintEntry")?,
1146 shard_list_start_index: read_u32(chunk, 24, "DirectoryHintEntry")?,
1147 shard_count: read_u32(chunk, 28, "DirectoryHintEntry")?,
1148 });
1149 }
1150 Ok(entries)
1151}
1152
1153fn validate_index_root_totals(
1154 header: &IndexRootHeader,
1155 shards: &[ShardEntry],
1156 has_dictionary: bool,
1157) -> Result<(), FormatError> {
1158 if shards.is_empty() {
1159 if header.file_count != 0
1160 || header.frame_count != 0
1161 || header.envelope_count != 0
1162 || header.payload_block_count != 0
1163 || header.tar_total_size != 0
1164 {
1165 return invalid(
1166 "IndexRoot",
1167 "empty shard table has non-empty archive totals",
1168 );
1169 }
1170 if header.content_sha256 != SHA256_EMPTY {
1171 return invalid(
1172 "IndexRoot",
1173 "empty archive content hash is not SHA-256(empty)",
1174 );
1175 }
1176 if has_dictionary || !index_root_dictionary_fields_are_zero(header) {
1177 return invalid("IndexRoot", "empty archive cannot use dictionary");
1178 }
1179 return Ok(());
1180 }
1181
1182 let mut sum = 0u64;
1183 for shard in shards {
1184 sum = sum.checked_add(shard.file_count as u64).ok_or(
1185 FormatError::MetadataArithmeticOverflow {
1186 structure: "IndexRoot",
1187 },
1188 )?;
1189 }
1190 if sum != header.file_count {
1191 return invalid(
1192 "IndexRoot",
1193 "file_count does not equal sum of ShardEntry rows",
1194 );
1195 }
1196 Ok(())
1197}
1198
1199fn validate_dictionary_fields(
1200 header: &IndexRootHeader,
1201 has_dictionary: bool,
1202 limits: MetadataLimits,
1203) -> Result<(), FormatError> {
1204 if !has_dictionary {
1205 if !index_root_dictionary_fields_are_zero(header) {
1206 return invalid(
1207 "IndexRoot",
1208 "dictionary fields are non-zero while has_dictionary is false",
1209 );
1210 }
1211 return Ok(());
1212 }
1213
1214 if header.dictionary_data_block_count == 0 {
1215 return invalid(
1216 "IndexRoot",
1217 "dictionary data block count is zero while has_dictionary is true",
1218 );
1219 }
1220 if header.dictionary_first_block == 0
1221 || header.dictionary_encrypted_size == 0
1222 || header.dictionary_decompressed_size == 0
1223 {
1224 return invalid("IndexRoot", "required dictionary field is zero");
1225 }
1226 validate_encrypted_extent(
1227 "IndexRoot.dictionary",
1228 header.dictionary_data_block_count,
1229 header.dictionary_encrypted_size,
1230 limits.block_size,
1231 )?;
1232 validate_fec_class_extent(
1233 "IndexRoot.dictionary",
1234 header.dictionary_data_block_count,
1235 header.dictionary_parity_block_count,
1236 limits.max_index_root_data_shards,
1237 limits.max_index_root_parity_shards,
1238 )
1239}
1240
1241fn index_root_dictionary_fields_are_zero(header: &IndexRootHeader) -> bool {
1242 header.dictionary_first_block == 0
1243 && header.dictionary_data_block_count == 0
1244 && header.dictionary_parity_block_count == 0
1245 && header.dictionary_encrypted_size == 0
1246 && header.dictionary_decompressed_size == 0
1247}
1248
1249fn validate_index_shard_tables(
1250 files: &[FileEntry],
1251 frames: &[FrameEntry],
1252 envelopes: &[EnvelopeEntry],
1253 string_pool: &[u8],
1254 locating_shard: &ShardEntry,
1255 limits: MetadataLimits,
1256) -> Result<(Vec<Vec<u8>>, Vec<u64>), FormatError> {
1257 validate_frame_table(frames)?;
1258 validate_envelope_table(envelopes, limits)?;
1259
1260 let frame_by_index = frames
1261 .iter()
1262 .enumerate()
1263 .map(|(idx, frame)| (frame.frame_index, idx))
1264 .collect::<HashMap<_, _>>();
1265 let envelope_by_index = envelopes
1266 .iter()
1267 .enumerate()
1268 .map(|(idx, envelope)| (envelope.envelope_index, idx))
1269 .collect::<HashMap<_, _>>();
1270
1271 let mut paths = Vec::with_capacity(files.len());
1272 let mut starts = Vec::with_capacity(files.len());
1273 let mut required_frames = BTreeSet::new();
1274
1275 for file in files {
1276 if file.flags != 0 {
1277 return invalid("FileEntry", "reserved flags are non-zero");
1278 }
1279 if file.path_length == 0 {
1280 return invalid("FileEntry", "path length is zero");
1281 }
1282 if file.path_length > limits.max_path_length {
1283 return invalid("FileEntry", "path length exceeds configured maximum");
1284 }
1285 if file.frame_count == 0 {
1286 return invalid("FileEntry", "frame count is zero");
1287 }
1288 if file.tar_member_group_size < 512 {
1289 return invalid(
1290 "FileEntry",
1291 "tar member group is smaller than one tar record",
1292 );
1293 }
1294 if file.path_hash < locating_shard.first_path_hash
1295 || file.path_hash > locating_shard.last_path_hash
1296 {
1297 return invalid(
1298 "FileEntry",
1299 "path hash is outside locating ShardEntry bounds",
1300 );
1301 }
1302
1303 let path = string_slice(
1304 string_pool,
1305 file.path_offset as u64,
1306 file.path_length as u64,
1307 "FileEntry",
1308 )?;
1309 validate_file_path_bytes(path, limits.max_path_length)?;
1310 if hash_prefix(path) != file.path_hash {
1311 return invalid("FileEntry", "path hash does not match string-pool path");
1312 }
1313
1314 let first_frame = frame_for_file(file, &frame_by_index, frames, file.first_frame_index)?;
1315 let tar_member_group_start = first_frame
1316 .tar_stream_offset
1317 .checked_add(file.offset_in_first_frame_plaintext as u64)
1318 .ok_or(FormatError::MetadataArithmeticOverflow {
1319 structure: "FileEntry",
1320 })?;
1321 validate_file_frame_range(file, frames, &frame_by_index)?;
1322 for offset in 0..file.frame_count as u64 {
1323 let index = file.first_frame_index.checked_add(offset).ok_or(
1324 FormatError::MetadataArithmeticOverflow {
1325 structure: "FileEntry",
1326 },
1327 )?;
1328 required_frames.insert(index);
1329 }
1330 paths.push(path.to_vec());
1331 starts.push(tar_member_group_start);
1332 }
1333
1334 validate_file_order(files, &paths, &starts)?;
1335 if required_frames.len() != frames.len()
1336 || frames
1337 .iter()
1338 .any(|frame| !required_frames.contains(&frame.frame_index))
1339 {
1340 return invalid(
1341 "IndexShard",
1342 "FrameEntry table is not the exact set referenced by FileEntry rows",
1343 );
1344 }
1345
1346 let mut required_envelopes = BTreeSet::new();
1347 for frame in frames {
1348 let envelope = envelope_by_index
1349 .get(&frame.envelope_index)
1350 .and_then(|idx| envelopes.get(*idx))
1351 .ok_or_else(|| FormatError::InvalidMetadata {
1352 structure: "FrameEntry",
1353 reason: "referenced EnvelopeEntry is missing",
1354 })?;
1355 validate_frame_envelope_binding(frame, envelope)?;
1356 required_envelopes.insert(frame.envelope_index);
1357 }
1358 if required_envelopes.len() != envelopes.len()
1359 || envelopes
1360 .iter()
1361 .any(|entry| !required_envelopes.contains(&entry.envelope_index))
1362 {
1363 return invalid(
1364 "IndexShard",
1365 "EnvelopeEntry table is not the exact set referenced by FrameEntry rows",
1366 );
1367 }
1368 validate_frame_slices_by_envelope(frames, envelopes)?;
1369
1370 if let Some(first) = files.first() {
1371 if first.path_hash != locating_shard.first_path_hash {
1372 return invalid(
1373 "IndexShard",
1374 "first FileEntry hash does not match ShardEntry",
1375 );
1376 }
1377 }
1378 if let Some(last) = files.last() {
1379 if last.path_hash != locating_shard.last_path_hash {
1380 return invalid(
1381 "IndexShard",
1382 "last FileEntry hash does not match ShardEntry",
1383 );
1384 }
1385 }
1386
1387 Ok((paths, starts))
1388}
1389
1390fn validate_frame_table(frames: &[FrameEntry]) -> Result<(), FormatError> {
1391 for frame in frames {
1392 if frame.compressed_size == 0 || frame.decompressed_size == 0 {
1393 return invalid("FrameEntry", "frame sizes must be non-zero");
1394 }
1395 if frame.flags & !FRAME_KNOWN_FLAGS != 0 {
1396 return invalid("FrameEntry", "reserved flag bits are non-zero");
1397 }
1398 }
1399 for pair in frames.windows(2) {
1400 let previous = &pair[0];
1401 let next = &pair[1];
1402 if previous.frame_index >= next.frame_index {
1403 return invalid("IndexShard", "FrameEntry rows are not sorted and unique");
1404 }
1405 let previous_end = previous
1406 .tar_stream_offset
1407 .checked_add(previous.decompressed_size as u64)
1408 .ok_or(FormatError::MetadataArithmeticOverflow {
1409 structure: "FrameEntry",
1410 })?;
1411 if next.frame_index == previous.frame_index + 1 {
1412 if next.tar_stream_offset != previous_end {
1413 return invalid(
1414 "FrameEntry",
1415 "consecutive tar stream offsets are not packed",
1416 );
1417 }
1418 } else if next.tar_stream_offset <= previous_end {
1419 return invalid("FrameEntry", "non-consecutive tar stream offsets overlap");
1420 }
1421 }
1422 Ok(())
1423}
1424
1425fn validate_envelope_table(
1426 envelopes: &[EnvelopeEntry],
1427 limits: MetadataLimits,
1428) -> Result<(), FormatError> {
1429 for envelope in envelopes {
1430 if envelope.frame_count == 0 || envelope.plaintext_size == 0 {
1431 return invalid("EnvelopeEntry", "payload envelope has no frame plaintext");
1432 }
1433 validate_encrypted_extent(
1434 "EnvelopeEntry",
1435 envelope.data_block_count,
1436 envelope.encrypted_size,
1437 limits.block_size,
1438 )?;
1439 validate_fec_class_extent(
1440 "EnvelopeEntry",
1441 envelope.data_block_count,
1442 envelope.parity_block_count,
1443 limits.max_payload_data_shards,
1444 limits.max_payload_parity_shards,
1445 )?;
1446 }
1447 for pair in envelopes.windows(2) {
1448 if pair[0].envelope_index >= pair[1].envelope_index {
1449 return invalid("IndexShard", "EnvelopeEntry rows are not sorted and unique");
1450 }
1451 }
1452 Ok(())
1453}
1454
1455fn validate_file_order(
1456 files: &[FileEntry],
1457 paths: &[Vec<u8>],
1458 starts: &[u64],
1459) -> Result<(), FormatError> {
1460 for idx in 1..files.len() {
1461 let previous_key = (
1462 &files[idx - 1].path_hash,
1463 paths[idx - 1].as_slice(),
1464 starts[idx - 1],
1465 );
1466 let current_key = (&files[idx].path_hash, paths[idx].as_slice(), starts[idx]);
1467 if previous_key >= current_key {
1468 return invalid("IndexShard", "FileEntry rows are not sorted and unique");
1469 }
1470 }
1471 Ok(())
1472}
1473
1474fn validate_file_frame_range(
1475 file: &FileEntry,
1476 frames: &[FrameEntry],
1477 frame_by_index: &HashMap<u64, usize>,
1478) -> Result<(), FormatError> {
1479 let first = frame_for_file(file, frame_by_index, frames, file.first_frame_index)?;
1480 if file.offset_in_first_frame_plaintext >= first.decompressed_size {
1481 return invalid(
1482 "FileEntry",
1483 "offset in first frame is outside the first referenced frame",
1484 );
1485 }
1486
1487 let mut bytes_before_last =
1488 first.decompressed_size as u64 - file.offset_in_first_frame_plaintext as u64;
1489 if file.frame_count == 1 {
1490 if file.tar_member_group_size > bytes_before_last {
1491 return invalid(
1492 "FileEntry",
1493 "tar member group exceeds the single referenced frame",
1494 );
1495 }
1496 return Ok(());
1497 }
1498
1499 for offset in 1..(file.frame_count as u64 - 1) {
1500 let frame_index = file.first_frame_index.checked_add(offset).ok_or(
1501 FormatError::MetadataArithmeticOverflow {
1502 structure: "FileEntry",
1503 },
1504 )?;
1505 let frame = frame_for_file(file, frame_by_index, frames, frame_index)?;
1506 bytes_before_last = bytes_before_last
1507 .checked_add(frame.decompressed_size as u64)
1508 .ok_or(FormatError::MetadataArithmeticOverflow {
1509 structure: "FileEntry",
1510 })?;
1511 }
1512
1513 let last_index = file
1514 .first_frame_index
1515 .checked_add(file.frame_count as u64 - 1)
1516 .ok_or(FormatError::MetadataArithmeticOverflow {
1517 structure: "FileEntry",
1518 })?;
1519 let last = frame_for_file(file, frame_by_index, frames, last_index)?;
1520 let max_size = bytes_before_last
1521 .checked_add(last.decompressed_size as u64)
1522 .ok_or(FormatError::MetadataArithmeticOverflow {
1523 structure: "FileEntry",
1524 })?;
1525 if file.tar_member_group_size <= bytes_before_last || file.tar_member_group_size > max_size {
1526 return invalid("FileEntry", "frame range is not minimal");
1527 }
1528 Ok(())
1529}
1530
1531fn validate_frame_envelope_binding(
1532 frame: &FrameEntry,
1533 envelope: &EnvelopeEntry,
1534) -> Result<(), FormatError> {
1535 let envelope_frame_end = envelope
1536 .first_frame_index
1537 .checked_add(envelope.frame_count as u64)
1538 .ok_or(FormatError::MetadataArithmeticOverflow {
1539 structure: "EnvelopeEntry",
1540 })?;
1541 if frame.frame_index < envelope.first_frame_index || frame.frame_index >= envelope_frame_end {
1542 return invalid("FrameEntry", "frame index is outside envelope frame range");
1543 }
1544 let end = frame
1545 .offset_in_envelope
1546 .checked_add(frame.compressed_size)
1547 .ok_or(FormatError::MetadataArithmeticOverflow {
1548 structure: "FrameEntry",
1549 })?;
1550 if end > envelope.plaintext_size {
1551 return invalid("FrameEntry", "frame slice exceeds envelope plaintext");
1552 }
1553 Ok(())
1554}
1555
1556fn validate_frame_slices_by_envelope(
1557 frames: &[FrameEntry],
1558 envelopes: &[EnvelopeEntry],
1559) -> Result<(), FormatError> {
1560 for envelope in envelopes {
1561 let mut slices = frames
1562 .iter()
1563 .filter(|frame| frame.envelope_index == envelope.envelope_index)
1564 .map(|frame| {
1565 let end = frame
1566 .offset_in_envelope
1567 .checked_add(frame.compressed_size)
1568 .ok_or(FormatError::MetadataArithmeticOverflow {
1569 structure: "FrameEntry",
1570 })?;
1571 Ok((frame.offset_in_envelope, end, frame.frame_index))
1572 })
1573 .collect::<Result<Vec<_>, FormatError>>()?;
1574 slices.sort_unstable_by_key(|slice| (slice.0, slice.2));
1575 for pair in slices.windows(2) {
1576 if pair[0].1 > pair[1].0 {
1577 return invalid("FrameEntry", "frame slices overlap inside an envelope");
1578 }
1579 }
1580
1581 let contains_complete_global_range = (0..envelope.frame_count as u64).all(|offset| {
1582 envelope
1583 .first_frame_index
1584 .checked_add(offset)
1585 .map(|index| slices.iter().any(|slice| slice.2 == index))
1586 .unwrap_or(false)
1587 });
1588 if contains_complete_global_range {
1589 let mut cursor = 0u32;
1590 for (start, end, _) in slices {
1591 if start != cursor {
1592 return invalid("EnvelopeEntry", "complete local envelope has frame gap");
1593 }
1594 cursor = end;
1595 }
1596 if cursor != envelope.plaintext_size {
1597 return invalid(
1598 "EnvelopeEntry",
1599 "complete local envelope does not cover plaintext",
1600 );
1601 }
1602 }
1603 }
1604 Ok(())
1605}
1606
1607fn validate_directory_hint_entries(
1608 entries: &[DirectoryHintEntry],
1609 bytes: &[u8],
1610 header: &DirectoryHintTableHeader,
1611 locating_shard: &DirectoryHintShardEntry,
1612 index_root_shard_count: u32,
1613) -> Result<usize, FormatError> {
1614 let structure = "DirectoryHintTable";
1615 if index_root_shard_count == 0 {
1616 return invalid(structure, "directory hints require IndexRoot shard rows");
1617 }
1618 if entries.is_empty() {
1619 return invalid(structure, "located directory hint table is empty");
1620 }
1621 if entries[0].dir_hash != locating_shard.first_dir_hash {
1622 return invalid(
1623 structure,
1624 "first DirectoryHintEntry hash does not match locating row",
1625 );
1626 }
1627 if entries[entries.len() - 1].dir_hash != locating_shard.last_dir_hash {
1628 return invalid(
1629 structure,
1630 "last DirectoryHintEntry hash does not match locating row",
1631 );
1632 }
1633
1634 let mut max_shard_list_end = 0usize;
1635 for entry in entries {
1636 if entry.shard_count == 0 {
1637 return invalid("DirectoryHintEntry", "shard count is zero");
1638 }
1639 let start = entry.shard_list_start_index as usize;
1640 let end = start.checked_add(entry.shard_count as usize).ok_or(
1641 FormatError::MetadataArithmeticOverflow {
1642 structure: "DirectoryHintEntry",
1643 },
1644 )?;
1645 max_shard_list_end = max_shard_list_end.max(end);
1646 }
1647 let byte_len = checked_mul(max_shard_list_end, 4, structure)?;
1648 let shard_list_offset = to_usize(header.shard_list_offset, structure)?;
1649 let shard_list_end = checked_add(shard_list_offset, byte_len, structure)?;
1650 if shard_list_end > bytes.len() {
1651 return invalid(structure, "shard list exceeds plaintext");
1652 }
1653 Ok(max_shard_list_end)
1654}
1655
1656fn validate_directory_hint_paths_and_lists(
1657 entries: &[DirectoryHintEntry],
1658 shard_row_indexes: &[u32],
1659 string_pool: &[u8],
1660 locating_shard: &DirectoryHintShardEntry,
1661 index_root_shard_count: u32,
1662 max_path_length: u32,
1663) -> Result<Vec<Vec<u8>>, FormatError> {
1664 let mut paths = Vec::with_capacity(entries.len());
1665 let mut seen_paths = HashSet::new();
1666 for entry in entries {
1667 let path = if entry.path_length == 0 {
1668 if entry.path_offset != 0 || entry.dir_hash != hash_prefix(b"") {
1669 return invalid(
1670 "DirectoryHintEntry",
1671 "root directory entry is not canonical",
1672 );
1673 }
1674 &[][..]
1675 } else {
1676 let path = string_slice(
1677 string_pool,
1678 entry.path_offset,
1679 entry.path_length as u64,
1680 "DirectoryHintEntry",
1681 )?;
1682 validate_directory_path_bytes(path, max_path_length)?;
1683 path
1684 };
1685 if hash_prefix(path) != entry.dir_hash {
1686 return invalid(
1687 "DirectoryHintEntry",
1688 "dir_hash does not match string-pool path",
1689 );
1690 }
1691 if !seen_paths.insert(path.to_vec()) {
1692 return invalid("DirectoryHintEntry", "duplicate directory path");
1693 }
1694
1695 let start = entry.shard_list_start_index as usize;
1696 let end = start.checked_add(entry.shard_count as usize).ok_or(
1697 FormatError::MetadataArithmeticOverflow {
1698 structure: "DirectoryHintEntry",
1699 },
1700 )?;
1701 let rows = shard_row_indexes
1702 .get(start..end)
1703 .ok_or(FormatError::InvalidMetadata {
1704 structure: "DirectoryHintEntry",
1705 reason: "shard-row-index range is out of bounds",
1706 })?;
1707 for pair in rows.windows(2) {
1708 if pair[0] >= pair[1] {
1709 return invalid(
1710 "DirectoryHintEntry",
1711 "shard-row-index list is not sorted and unique",
1712 );
1713 }
1714 }
1715 if rows.iter().any(|row| *row >= index_root_shard_count) {
1716 return invalid(
1717 "DirectoryHintEntry",
1718 "shard-row-index is outside IndexRoot shard table",
1719 );
1720 }
1721 paths.push(path.to_vec());
1722 }
1723
1724 for idx in 1..entries.len() {
1725 let previous_key = (&entries[idx - 1].dir_hash, paths[idx - 1].as_slice());
1726 let current_key = (&entries[idx].dir_hash, paths[idx].as_slice());
1727 if previous_key >= current_key {
1728 return invalid(
1729 "DirectoryHintTable",
1730 "DirectoryHintEntry rows are not sorted and unique",
1731 );
1732 }
1733 }
1734 if entries[0].dir_hash != locating_shard.first_dir_hash
1735 || entries[entries.len() - 1].dir_hash != locating_shard.last_dir_hash
1736 {
1737 return invalid(
1738 "DirectoryHintTable",
1739 "entry hash bounds do not match locating shard",
1740 );
1741 }
1742
1743 Ok(paths)
1744}
1745
1746fn candidate_interval_indexes<T>(
1747 entries: &[T],
1748 target_hash: [u8; 8],
1749 scan_cap_per_direction: usize,
1750 first_hash: impl Fn(&T) -> [u8; 8],
1751 last_hash: impl Fn(&T) -> [u8; 8],
1752) -> Result<Vec<usize>, FormatError> {
1753 if entries.is_empty() {
1754 return Ok(Vec::new());
1755 }
1756 let upper = entries.partition_point(|entry| first_hash(entry) <= target_hash);
1757 if upper == 0 {
1758 return Ok(Vec::new());
1759 }
1760 let landing = upper - 1;
1761 if last_hash(&entries[landing]) < target_hash {
1762 return Ok(Vec::new());
1763 }
1764
1765 let mut start = landing;
1766 let mut left_scanned = 0usize;
1767 while start > 0
1768 && first_hash(&entries[start - 1]) <= target_hash
1769 && last_hash(&entries[start - 1]) >= target_hash
1770 {
1771 left_scanned += 1;
1772 if left_scanned > scan_cap_per_direction {
1773 return Err(FormatError::HashPrefixCollisionRunExceeded);
1774 }
1775 start -= 1;
1776 }
1777
1778 let mut end = landing + 1;
1779 let mut right_scanned = 0usize;
1780 while end < entries.len()
1781 && first_hash(&entries[end]) <= target_hash
1782 && last_hash(&entries[end]) >= target_hash
1783 {
1784 right_scanned += 1;
1785 if right_scanned > scan_cap_per_direction {
1786 return Err(FormatError::HashPrefixCollisionRunExceeded);
1787 }
1788 end += 1;
1789 }
1790
1791 Ok((start..end).collect())
1792}
1793
1794pub fn validate_file_path_bytes(path: &[u8], max_path_length: u32) -> Result<(), FormatError> {
1795 if path.is_empty() || path.len() > max_path_length as usize {
1796 return Err(FormatError::UnsafeArchivePath);
1797 }
1798 validate_relative_path(path, false)
1799}
1800
1801pub fn validate_directory_path_bytes(path: &[u8], max_path_length: u32) -> Result<(), FormatError> {
1802 if path.len() > max_path_length as usize {
1803 return Err(FormatError::UnsafeArchivePath);
1804 }
1805 validate_relative_path(path, true)
1806}
1807
1808fn validate_relative_path(path: &[u8], allow_empty_root: bool) -> Result<(), FormatError> {
1809 if path.is_empty() {
1810 return if allow_empty_root {
1811 Ok(())
1812 } else {
1813 Err(FormatError::UnsafeArchivePath)
1814 };
1815 }
1816 if path.contains(&0) || path.contains(&b'\\') || path.contains(&b':') || path[0] == b'/' {
1817 return Err(FormatError::UnsafeArchivePath);
1818 }
1819 let path_str = std::str::from_utf8(path).map_err(|_| FormatError::UnsafeArchivePath)?;
1820 if !path_str.nfc().eq(path_str.chars()) {
1821 return Err(FormatError::UnsafeArchivePath);
1822 }
1823 for component in path_str.split('/') {
1824 if component.is_empty() || component == "." || component == ".." {
1825 return Err(FormatError::UnsafeArchivePath);
1826 }
1827 if is_windows_device_component(component) {
1828 return Err(FormatError::UnsafeArchivePath);
1829 }
1830 }
1831 Ok(())
1832}
1833
1834fn is_windows_device_component(component: &str) -> bool {
1835 let stem = component
1836 .split('.')
1837 .next()
1838 .unwrap_or(component)
1839 .trim_end_matches(|ch| ch == ' ' || ch == '.');
1840 let upper = stem.to_ascii_uppercase();
1841 matches!(
1842 upper.as_str(),
1843 "CON"
1844 | "PRN"
1845 | "AUX"
1846 | "NUL"
1847 | "CLOCK$"
1848 | "COM1"
1849 | "COM2"
1850 | "COM3"
1851 | "COM4"
1852 | "COM5"
1853 | "COM6"
1854 | "COM7"
1855 | "COM8"
1856 | "COM9"
1857 | "COM\u{00b9}"
1858 | "COM\u{00b2}"
1859 | "COM\u{00b3}"
1860 | "LPT1"
1861 | "LPT2"
1862 | "LPT3"
1863 | "LPT4"
1864 | "LPT5"
1865 | "LPT6"
1866 | "LPT7"
1867 | "LPT8"
1868 | "LPT9"
1869 | "LPT\u{00b9}"
1870 | "LPT\u{00b2}"
1871 | "LPT\u{00b3}"
1872 )
1873}
1874
1875fn validate_encrypted_extent(
1876 structure: &'static str,
1877 data_block_count: u32,
1878 encrypted_size: u32,
1879 block_size: u32,
1880) -> Result<(), FormatError> {
1881 if data_block_count == 0 || encrypted_size == 0 {
1882 return invalid(structure, "encrypted object has zero data blocks or size");
1883 }
1884 let expected = (data_block_count as u64)
1885 .checked_mul(block_size as u64)
1886 .ok_or(FormatError::MetadataArithmeticOverflow { structure })?;
1887 if expected > u32::MAX as u64 || expected != encrypted_size as u64 {
1888 return invalid(
1889 structure,
1890 "encrypted_size is not data_block_count * block_size",
1891 );
1892 }
1893 Ok(())
1894}
1895
1896fn validate_fec_class_extent(
1897 structure: &'static str,
1898 data_block_count: u32,
1899 parity_block_count: u32,
1900 data_shard_max: u16,
1901 parity_shard_max: u16,
1902) -> Result<(), FormatError> {
1903 if data_block_count > data_shard_max as u32 {
1904 return invalid(structure, "data_block_count exceeds class maximum");
1905 }
1906 if parity_block_count > parity_shard_max as u32 {
1907 return invalid(structure, "parity_block_count exceeds class maximum");
1908 }
1909 let total = data_block_count as u64 + parity_block_count as u64;
1910 if total > REED_SOLOMON_GF16_MAX_TOTAL_SHARDS {
1911 return invalid(
1912 structure,
1913 "data_block_count + parity_block_count exceeds ReedSolomonGF16 limit",
1914 );
1915 }
1916 Ok(())
1917}
1918
1919fn frame_for_file<'a>(
1920 _file: &FileEntry,
1921 frame_by_index: &HashMap<u64, usize>,
1922 frames: &'a [FrameEntry],
1923 frame_index: u64,
1924) -> Result<&'a FrameEntry, FormatError> {
1925 frame_by_index
1926 .get(&frame_index)
1927 .and_then(|idx| frames.get(*idx))
1928 .ok_or(FormatError::InvalidMetadata {
1929 structure: "FileEntry",
1930 reason: "referenced FrameEntry is missing",
1931 })
1932}
1933
1934fn parse_counted_table<T>(
1935 bytes: &[u8],
1936 structure: &'static str,
1937 name: &'static str,
1938 count: u64,
1939 offset: u64,
1940 entry_len: usize,
1941 cursor: &mut usize,
1942 parse: fn(&[u8]) -> Result<T, FormatError>,
1943) -> Result<Vec<T>, FormatError> {
1944 if count == 0 {
1945 if offset != 0 {
1946 return invalid(structure, "absent counted table has non-zero offset");
1947 }
1948 return Ok(Vec::new());
1949 }
1950 expect_offset(structure, name, offset, *cursor)?;
1951 let count = to_usize(count, structure)?;
1952 let bytes_len = checked_mul(count, entry_len, structure)?;
1953 let table = slice(bytes, *cursor, bytes_len, structure)?;
1954 *cursor = checked_add(*cursor, bytes_len, structure)?;
1955 table.chunks_exact(entry_len).map(parse).collect()
1956}
1957
1958fn parse_u32_array(bytes: &[u8], structure: &'static str) -> Result<Vec<u32>, FormatError> {
1959 let mut out = Vec::with_capacity(bytes.len() / 4);
1960 for chunk in bytes.chunks_exact(4) {
1961 out.push(read_u32(chunk, 0, structure)?);
1962 }
1963 Ok(out)
1964}
1965
1966fn string_slice<'a>(
1967 string_pool: &'a [u8],
1968 offset: u64,
1969 length: u64,
1970 structure: &'static str,
1971) -> Result<&'a [u8], FormatError> {
1972 let start = to_usize(offset, structure)?;
1973 let len = to_usize(length, structure)?;
1974 slice(string_pool, start, len, structure)
1975}
1976
1977fn shard_entry_sort_key(entry: &ShardEntry) -> ([u8; 8], [u8; 8], u64) {
1978 (
1979 entry.first_path_hash,
1980 entry.last_path_hash,
1981 entry.shard_index,
1982 )
1983}
1984
1985fn directory_hint_shard_sort_key(entry: &DirectoryHintShardEntry) -> ([u8; 8], [u8; 8], u64) {
1986 (
1987 entry.first_dir_hash,
1988 entry.last_dir_hash,
1989 entry.hint_shard_index,
1990 )
1991}
1992
1993fn table_offset(len: usize, cursor: usize) -> u32 {
1994 if len == 0 {
1995 0
1996 } else {
1997 cursor as u32
1998 }
1999}
2000
2001fn expect_magic(
2002 structure: &'static str,
2003 expected: [u8; 4],
2004 actual: [u8; 4],
2005) -> Result<(), FormatError> {
2006 if actual != expected {
2007 return Err(FormatError::BadMagic { structure });
2008 }
2009 Ok(())
2010}
2011
2012fn expect_zero(structure: &'static str, bytes: &[u8]) -> Result<(), FormatError> {
2013 if bytes.iter().any(|byte| *byte != 0) {
2014 return Err(FormatError::NonZeroReserved { structure });
2015 }
2016 Ok(())
2017}
2018
2019fn expect_offset(
2020 structure: &'static str,
2021 name: &'static str,
2022 actual: u64,
2023 expected: usize,
2024) -> Result<(), FormatError> {
2025 if actual != expected as u64 {
2026 return Err(FormatError::InvalidMetadata {
2027 structure,
2028 reason: name,
2029 });
2030 }
2031 Ok(())
2032}
2033
2034fn slice<'a>(
2035 bytes: &'a [u8],
2036 offset: usize,
2037 len: usize,
2038 structure: &'static str,
2039) -> Result<&'a [u8], FormatError> {
2040 let end = checked_add(offset, len, structure)?;
2041 bytes.get(offset..end).ok_or(FormatError::InvalidMetadata {
2042 structure,
2043 reason: "range is out of bounds",
2044 })
2045}
2046
2047fn read_array<const N: usize>(
2048 bytes: &[u8],
2049 offset: usize,
2050 structure: &'static str,
2051) -> Result<[u8; N], FormatError> {
2052 let mut out = [0u8; N];
2053 out.copy_from_slice(slice(bytes, offset, N, structure)?);
2054 Ok(out)
2055}
2056
2057fn read_u32(bytes: &[u8], offset: usize, structure: &'static str) -> Result<u32, FormatError> {
2058 let raw = read_array::<4>(bytes, offset, structure)?;
2059 Ok(u32::from_le_bytes(raw))
2060}
2061
2062fn read_u64(bytes: &[u8], offset: usize, structure: &'static str) -> Result<u64, FormatError> {
2063 let raw = read_array::<8>(bytes, offset, structure)?;
2064 Ok(u64::from_le_bytes(raw))
2065}
2066
2067fn write_u32(bytes: &mut [u8], offset: usize, value: u32) {
2068 bytes[offset..offset + 4].copy_from_slice(&value.to_le_bytes());
2069}
2070
2071fn write_u64(bytes: &mut [u8], offset: usize, value: u64) {
2072 bytes[offset..offset + 8].copy_from_slice(&value.to_le_bytes());
2073}
2074
2075fn checked_add(lhs: usize, rhs: usize, structure: &'static str) -> Result<usize, FormatError> {
2076 lhs.checked_add(rhs)
2077 .ok_or(FormatError::MetadataArithmeticOverflow { structure })
2078}
2079
2080fn checked_mul(lhs: usize, rhs: usize, structure: &'static str) -> Result<usize, FormatError> {
2081 lhs.checked_mul(rhs)
2082 .ok_or(FormatError::MetadataArithmeticOverflow { structure })
2083}
2084
2085fn to_usize(value: u64, structure: &'static str) -> Result<usize, FormatError> {
2086 usize::try_from(value).map_err(|_| FormatError::MetadataArithmeticOverflow { structure })
2087}
2088
2089fn invalid<T>(structure: &'static str, reason: &'static str) -> Result<T, FormatError> {
2090 Err(FormatError::InvalidMetadata { structure, reason })
2091}
2092
2093#[cfg(test)]
2094mod tests {
2095 use super::*;
2096
2097 #[test]
2098 fn default_reader_caps_match_v36() {
2099 let limits = MetadataLimits::default();
2100 assert_eq!(limits.max_shard_count, 1_000_000);
2101 assert_eq!(limits.max_directory_hint_shards, 1_000_000);
2102 assert_eq!(limits.max_files_per_index_shard, 1_000_000);
2103 assert_eq!(limits.max_entries_per_directory_hint_shard, 1_000_000);
2104 assert_eq!(limits.max_hash_collision_shard_scan, 16);
2105 }
2106
2107 #[test]
2108 fn index_root_rejects_shard_extent_above_crypto_header_class_limits() {
2109 let path_hash = hash_prefix(b"a.txt");
2110 let root = IndexRoot {
2111 header: IndexRootHeader {
2112 file_count: 1,
2113 ..IndexRootHeader::empty()
2114 },
2115 shards: vec![ShardEntry {
2116 shard_index: 0,
2117 first_block_index: 1,
2118 data_block_count: 1,
2119 parity_block_count: 2,
2120 encrypted_size: 4096,
2121 decompressed_size: 64,
2122 file_count: 1,
2123 first_path_hash: path_hash,
2124 last_path_hash: path_hash,
2125 }],
2126 directory_hint_shards: Vec::new(),
2127 };
2128 let mut limits = MetadataLimits::default();
2129 limits.max_index_parity_shards = 1;
2130
2131 assert_eq!(
2132 IndexRoot::parse(&root.to_bytes(), false, limits).unwrap_err(),
2133 FormatError::InvalidMetadata {
2134 structure: "ShardEntry",
2135 reason: "parity_block_count exceeds class maximum",
2136 }
2137 );
2138 }
2139
2140 #[test]
2141 fn metadata_fec_extent_rejects_reed_solomon_total_overflow() {
2142 assert_eq!(
2143 validate_fec_class_extent("EnvelopeEntry", 65_535, 1, u16::MAX, u16::MAX).unwrap_err(),
2144 FormatError::InvalidMetadata {
2145 structure: "EnvelopeEntry",
2146 reason: "data_block_count + parity_block_count exceeds ReedSolomonGF16 limit",
2147 }
2148 );
2149 }
2150
2151 #[test]
2152 fn parses_valid_empty_index_root() {
2153 let root = IndexRoot {
2154 header: IndexRootHeader::empty(),
2155 shards: Vec::new(),
2156 directory_hint_shards: Vec::new(),
2157 };
2158
2159 let bytes = root.to_bytes();
2160 let parsed = IndexRoot::parse(&bytes, false, MetadataLimits::default()).unwrap();
2161
2162 assert_eq!(parsed.header.file_count, 0);
2163 assert!(parsed.shards.is_empty());
2164 assert!(parsed.directory_hint_shards.is_empty());
2165 }
2166
2167 #[test]
2168 fn index_root_rejects_nonzero_offsets_for_absent_counted_tables() {
2169 let mut root = IndexRoot {
2170 header: IndexRootHeader::empty(),
2171 shards: Vec::new(),
2172 directory_hint_shards: Vec::new(),
2173 };
2174
2175 let mut bytes = root.to_bytes();
2176 write_u64(&mut bytes, 88, INDEX_ROOT_LEN as u64);
2177 assert_eq!(
2178 IndexRoot::parse(&bytes, false, MetadataLimits::default()).unwrap_err(),
2179 FormatError::InvalidMetadata {
2180 structure: "IndexRoot",
2181 reason: "absent shard table has non-zero offset",
2182 }
2183 );
2184
2185 root.header.file_count = 1;
2186 root.shards.push(ShardEntry {
2187 shard_index: 0,
2188 first_block_index: 1,
2189 data_block_count: 1,
2190 parity_block_count: 0,
2191 encrypted_size: 4096,
2192 decompressed_size: 128,
2193 file_count: 1,
2194 first_path_hash: hash_prefix(b"a.txt"),
2195 last_path_hash: hash_prefix(b"a.txt"),
2196 });
2197 let mut bytes = root.to_bytes();
2198 write_u64(&mut bytes, 96, (INDEX_ROOT_LEN + SHARD_ENTRY_LEN) as u64);
2199 assert_eq!(
2200 IndexRoot::parse(&bytes, false, MetadataLimits::default()).unwrap_err(),
2201 FormatError::InvalidMetadata {
2202 structure: "IndexRoot",
2203 reason: "absent directory hint shard table has non-zero offset",
2204 }
2205 );
2206 }
2207
2208 #[test]
2209 fn index_root_rejects_has_dictionary_with_zero_dictionary_fields() {
2210 let root = IndexRoot {
2211 header: IndexRootHeader::empty(),
2212 shards: Vec::new(),
2213 directory_hint_shards: Vec::new(),
2214 };
2215
2216 assert_eq!(
2217 IndexRoot::parse(&root.to_bytes(), true, MetadataLimits::default()).unwrap_err(),
2218 FormatError::InvalidMetadata {
2219 structure: "IndexRoot",
2220 reason: "dictionary data block count is zero while has_dictionary is true",
2221 }
2222 );
2223 }
2224
2225 #[test]
2226 fn index_root_rejects_empty_archive_with_dictionary_extent() {
2227 let root = IndexRoot {
2228 header: IndexRootHeader {
2229 dictionary_first_block: 1,
2230 dictionary_data_block_count: 1,
2231 dictionary_encrypted_size: 4096,
2232 dictionary_decompressed_size: 16,
2233 ..IndexRootHeader::empty()
2234 },
2235 shards: Vec::new(),
2236 directory_hint_shards: Vec::new(),
2237 };
2238
2239 assert_eq!(
2240 IndexRoot::parse(&root.to_bytes(), true, MetadataLimits::default()).unwrap_err(),
2241 FormatError::InvalidMetadata {
2242 structure: "IndexRoot",
2243 reason: "empty archive cannot use dictionary",
2244 }
2245 );
2246 }
2247
2248 #[test]
2249 fn encrypted_object_extents_reject_zero_data_or_size_for_all_metadata_rows() {
2250 assert_eq!(
2251 validate_encrypted_extent("ManifestFooter.IndexRoot", 0, 4096, 4096).unwrap_err(),
2252 FormatError::InvalidMetadata {
2253 structure: "ManifestFooter.IndexRoot",
2254 reason: "encrypted object has zero data blocks or size",
2255 }
2256 );
2257 assert_eq!(
2258 validate_encrypted_extent("EnvelopeEntry", 1, 0, 4096).unwrap_err(),
2259 FormatError::InvalidMetadata {
2260 structure: "EnvelopeEntry",
2261 reason: "encrypted object has zero data blocks or size",
2262 }
2263 );
2264
2265 let path_hash = hash_prefix(b"a.txt");
2266 let mut root = IndexRoot {
2267 header: IndexRootHeader {
2268 file_count: 1,
2269 ..IndexRootHeader::empty()
2270 },
2271 shards: vec![ShardEntry {
2272 shard_index: 0,
2273 first_block_index: 1,
2274 data_block_count: 0,
2275 parity_block_count: 0,
2276 encrypted_size: 4096,
2277 decompressed_size: 128,
2278 file_count: 1,
2279 first_path_hash: path_hash,
2280 last_path_hash: path_hash,
2281 }],
2282 directory_hint_shards: Vec::new(),
2283 };
2284 assert_eq!(
2285 IndexRoot::parse(&root.to_bytes(), false, MetadataLimits::default()).unwrap_err(),
2286 FormatError::InvalidMetadata {
2287 structure: "ShardEntry",
2288 reason: "encrypted object has zero data blocks or size",
2289 }
2290 );
2291
2292 root.shards[0].data_block_count = 1;
2293 root.shards[0].encrypted_size = 4096;
2294 root.directory_hint_shards.push(DirectoryHintShardEntry {
2295 hint_shard_index: 0,
2296 first_dir_hash: hash_prefix(b""),
2297 last_dir_hash: hash_prefix(b""),
2298 first_block_index: 2,
2299 data_block_count: 1,
2300 parity_block_count: 0,
2301 encrypted_size: 0,
2302 decompressed_size: 72,
2303 entry_count: 1,
2304 });
2305 assert_eq!(
2306 IndexRoot::parse(&root.to_bytes(), false, MetadataLimits::default()).unwrap_err(),
2307 FormatError::InvalidMetadata {
2308 structure: "DirectoryHintShardEntry",
2309 reason: "encrypted object has zero data blocks or size",
2310 }
2311 );
2312
2313 let mut dict_root = IndexRoot {
2314 header: IndexRootHeader {
2315 file_count: 1,
2316 dictionary_first_block: 10,
2317 dictionary_data_block_count: 0,
2318 dictionary_parity_block_count: 0,
2319 dictionary_encrypted_size: 4096,
2320 dictionary_decompressed_size: 32,
2321 ..IndexRootHeader::empty()
2322 },
2323 shards: vec![ShardEntry {
2324 shard_index: 0,
2325 first_block_index: 1,
2326 data_block_count: 1,
2327 parity_block_count: 0,
2328 encrypted_size: 4096,
2329 decompressed_size: 128,
2330 file_count: 1,
2331 first_path_hash: path_hash,
2332 last_path_hash: path_hash,
2333 }],
2334 directory_hint_shards: Vec::new(),
2335 };
2336 assert_eq!(
2337 IndexRoot::parse(&dict_root.to_bytes(), true, MetadataLimits::default()).unwrap_err(),
2338 FormatError::InvalidMetadata {
2339 structure: "IndexRoot",
2340 reason: "dictionary data block count is zero while has_dictionary is true",
2341 }
2342 );
2343 dict_root.header.dictionary_data_block_count = 1;
2344 dict_root.header.dictionary_encrypted_size = 0;
2345 assert_eq!(
2346 IndexRoot::parse(&dict_root.to_bytes(), true, MetadataLimits::default()).unwrap_err(),
2347 FormatError::InvalidMetadata {
2348 structure: "IndexRoot",
2349 reason: "required dictionary field is zero",
2350 }
2351 );
2352 }
2353
2354 #[test]
2355 fn index_root_rejects_dictionary_fields_when_crypto_header_has_no_dictionary() {
2356 let mut root = IndexRoot {
2357 header: IndexRootHeader::empty(),
2358 shards: Vec::new(),
2359 directory_hint_shards: Vec::new(),
2360 };
2361 root.header.dictionary_first_block = 1;
2362 root.header.dictionary_data_block_count = 1;
2363 root.header.dictionary_encrypted_size = 4096;
2364 root.header.dictionary_decompressed_size = 16;
2365
2366 assert_eq!(
2367 IndexRoot::parse(&root.to_bytes(), false, MetadataLimits::default()).unwrap_err(),
2368 FormatError::InvalidMetadata {
2369 structure: "IndexRoot",
2370 reason: "dictionary fields are non-zero while has_dictionary is false",
2371 }
2372 );
2373 }
2374
2375 #[test]
2376 fn rejects_directory_hint_rows_sorted_by_old_v36_key_only() {
2377 let h = [0x10; 8];
2378 let z = [0x20; 8];
2379 let root = IndexRoot {
2380 header: IndexRootHeader {
2381 file_count: 1,
2382 ..IndexRootHeader::empty()
2383 },
2384 shards: vec![ShardEntry {
2385 shard_index: 0,
2386 first_block_index: 0,
2387 data_block_count: 1,
2388 parity_block_count: 1,
2389 encrypted_size: 4096,
2390 decompressed_size: 64,
2391 file_count: 1,
2392 first_path_hash: h,
2393 last_path_hash: z,
2394 }],
2395 directory_hint_shards: vec![
2396 DirectoryHintShardEntry {
2397 hint_shard_index: 0,
2398 first_dir_hash: h,
2399 last_dir_hash: z,
2400 first_block_index: 10,
2401 data_block_count: 1,
2402 parity_block_count: 1,
2403 encrypted_size: 4096,
2404 decompressed_size: 72,
2405 entry_count: 1,
2406 },
2407 DirectoryHintShardEntry {
2408 hint_shard_index: 1,
2409 first_dir_hash: h,
2410 last_dir_hash: h,
2411 first_block_index: 12,
2412 data_block_count: 1,
2413 parity_block_count: 1,
2414 encrypted_size: 4096,
2415 decompressed_size: 72,
2416 entry_count: 1,
2417 },
2418 ],
2419 };
2420
2421 assert_eq!(
2422 IndexRoot::parse(&root.to_bytes(), false, MetadataLimits::default()).unwrap_err(),
2423 FormatError::InvalidMetadata {
2424 structure: "IndexRoot",
2425 reason: "DirectoryHintShardEntry rows are not sorted"
2426 }
2427 );
2428 }
2429
2430 #[test]
2431 fn directory_hint_shard_count_cap_is_independent_from_index_shard_cap() {
2432 let path_hash = hash_prefix(b"a.txt");
2433 let dir_hash = hash_prefix(b"");
2434 let root = IndexRoot {
2435 header: IndexRootHeader {
2436 file_count: 1,
2437 ..IndexRootHeader::empty()
2438 },
2439 shards: vec![ShardEntry {
2440 shard_index: 0,
2441 first_block_index: 1,
2442 data_block_count: 1,
2443 parity_block_count: 0,
2444 encrypted_size: 4096,
2445 decompressed_size: 128,
2446 file_count: 1,
2447 first_path_hash: path_hash,
2448 last_path_hash: path_hash,
2449 }],
2450 directory_hint_shards: vec![DirectoryHintShardEntry {
2451 hint_shard_index: 0,
2452 first_dir_hash: dir_hash,
2453 last_dir_hash: dir_hash,
2454 first_block_index: 2,
2455 data_block_count: 1,
2456 parity_block_count: 0,
2457 encrypted_size: 4096,
2458 decompressed_size: 72,
2459 entry_count: 1,
2460 }],
2461 };
2462 let mut limits = MetadataLimits::default();
2463 limits.max_shard_count = 1;
2464 limits.max_directory_hint_shards = 1;
2465 IndexRoot::parse(&root.to_bytes(), false, limits).unwrap();
2466
2467 limits.max_directory_hint_shards = 0;
2468 assert_eq!(
2469 IndexRoot::parse(&root.to_bytes(), false, limits).unwrap_err(),
2470 FormatError::InvalidMetadata {
2471 structure: "IndexRoot",
2472 reason: "directory hint shard count exceeds resource cap",
2473 }
2474 );
2475 }
2476
2477 #[test]
2478 fn directory_hint_paths_obey_configured_max_path_length() {
2479 let path = b"toolong".to_vec();
2480 let table = DirectoryHintTable {
2481 header: DirectoryHintTableHeader {
2482 version: 1,
2483 hint_shard_index: 0,
2484 entry_count: 0,
2485 entry_table_offset: 0,
2486 shard_list_offset: 0,
2487 string_pool_offset: 0,
2488 string_pool_size: 0,
2489 },
2490 entries: vec![DirectoryHintEntry {
2491 dir_hash: hash_prefix(&path),
2492 path_offset: 0,
2493 path_length: path.len() as u32,
2494 shard_list_start_index: 0,
2495 shard_count: 1,
2496 }],
2497 shard_row_indexes: vec![0],
2498 string_pool: path.clone(),
2499 entry_paths: vec![path.clone()],
2500 };
2501 let bytes = table.to_bytes();
2502 let locating = DirectoryHintShardEntry {
2503 hint_shard_index: 0,
2504 first_dir_hash: hash_prefix(&path),
2505 last_dir_hash: hash_prefix(&path),
2506 first_block_index: 0,
2507 data_block_count: 1,
2508 parity_block_count: 0,
2509 encrypted_size: 4096,
2510 decompressed_size: bytes.len() as u32,
2511 entry_count: 1,
2512 };
2513 let mut limits = MetadataLimits::default();
2514 limits.max_path_length = 3;
2515
2516 assert_eq!(
2517 DirectoryHintTable::parse(&bytes, &locating, 1, limits).unwrap_err(),
2518 FormatError::UnsafeArchivePath
2519 );
2520 }
2521
2522 #[test]
2523 fn directory_hint_table_rejects_wrong_hint_shard_identity() {
2524 let path = b"dir".to_vec();
2525 let table = DirectoryHintTable {
2526 header: DirectoryHintTableHeader {
2527 version: 1,
2528 hint_shard_index: 5,
2529 entry_count: 0,
2530 entry_table_offset: 0,
2531 shard_list_offset: 0,
2532 string_pool_offset: 0,
2533 string_pool_size: 0,
2534 },
2535 entries: vec![DirectoryHintEntry {
2536 dir_hash: hash_prefix(&path),
2537 path_offset: 0,
2538 path_length: path.len() as u32,
2539 shard_list_start_index: 0,
2540 shard_count: 1,
2541 }],
2542 shard_row_indexes: vec![0],
2543 string_pool: path.clone(),
2544 entry_paths: vec![path.clone()],
2545 };
2546 let bytes = table.to_bytes();
2547 let locating = DirectoryHintShardEntry {
2548 hint_shard_index: 6,
2549 first_dir_hash: hash_prefix(&path),
2550 last_dir_hash: hash_prefix(&path),
2551 first_block_index: 0,
2552 data_block_count: 1,
2553 parity_block_count: 0,
2554 encrypted_size: 4096,
2555 decompressed_size: bytes.len() as u32,
2556 entry_count: 1,
2557 };
2558
2559 assert_eq!(
2560 DirectoryHintTable::parse(&bytes, &locating, 1, MetadataLimits::default()).unwrap_err(),
2561 FormatError::InvalidMetadata {
2562 structure: "DirectoryHintTable",
2563 reason: "hint shard index does not match locating DirectoryHintShardEntry",
2564 }
2565 );
2566 }
2567
2568 #[test]
2569 fn directory_hint_table_rejects_empty_shard_lists() {
2570 let path = b"dir".to_vec();
2571 let table = DirectoryHintTable {
2572 header: DirectoryHintTableHeader {
2573 version: 1,
2574 hint_shard_index: 0,
2575 entry_count: 0,
2576 entry_table_offset: 0,
2577 shard_list_offset: 0,
2578 string_pool_offset: 0,
2579 string_pool_size: 0,
2580 },
2581 entries: vec![DirectoryHintEntry {
2582 dir_hash: hash_prefix(&path),
2583 path_offset: 0,
2584 path_length: path.len() as u32,
2585 shard_list_start_index: 0,
2586 shard_count: 0,
2587 }],
2588 shard_row_indexes: Vec::new(),
2589 string_pool: path.clone(),
2590 entry_paths: vec![path.clone()],
2591 };
2592 let bytes = table.to_bytes();
2593 let locating = DirectoryHintShardEntry {
2594 hint_shard_index: 0,
2595 first_dir_hash: hash_prefix(&path),
2596 last_dir_hash: hash_prefix(&path),
2597 first_block_index: 0,
2598 data_block_count: 1,
2599 parity_block_count: 0,
2600 encrypted_size: 4096,
2601 decompressed_size: bytes.len() as u32,
2602 entry_count: 1,
2603 };
2604
2605 assert_eq!(
2606 DirectoryHintTable::parse(&bytes, &locating, 1, MetadataLimits::default()).unwrap_err(),
2607 FormatError::InvalidMetadata {
2608 structure: "DirectoryHintEntry",
2609 reason: "shard count is zero",
2610 }
2611 );
2612 }
2613
2614 #[test]
2615 fn index_shard_rejects_unsupported_version_and_zero_count_pointer_offsets() {
2616 let path = b"file.txt";
2617 let path_hash = hash_prefix(path);
2618 let file = FileEntry {
2619 path_hash,
2620 path_offset: 0,
2621 path_length: path.len() as u32,
2622 first_frame_index: 0,
2623 frame_count: 1,
2624 offset_in_first_frame_plaintext: 0,
2625 tar_member_group_size: 512,
2626 file_data_size: 0,
2627 flags: 0,
2628 };
2629 let frame = FrameEntry {
2630 frame_index: 0,
2631 envelope_index: 0,
2632 offset_in_envelope: 0,
2633 compressed_size: 128,
2634 decompressed_size: 512,
2635 flags: 0,
2636 tar_stream_offset: 0,
2637 };
2638 let envelope = EnvelopeEntry {
2639 envelope_index: 0,
2640 first_block_index: 0,
2641 data_block_count: 1,
2642 parity_block_count: 0,
2643 encrypted_size: 4096,
2644 plaintext_size: 128,
2645 first_frame_index: 0,
2646 frame_count: 1,
2647 };
2648 let shard = IndexShard {
2649 header: IndexShardHeader {
2650 version: 1,
2651 shard_index: 7,
2652 file_count: 0,
2653 frame_count: 0,
2654 envelope_count: 0,
2655 file_table_offset: 0,
2656 frame_table_offset: 0,
2657 envelope_table_offset: 0,
2658 string_pool_offset: 0,
2659 string_pool_size: 0,
2660 },
2661 files: vec![file],
2662 frames: vec![frame],
2663 envelopes: vec![envelope],
2664 string_pool: path.to_vec(),
2665 file_paths: Vec::new(),
2666 file_tar_member_group_starts: Vec::new(),
2667 };
2668 let locating = ShardEntry {
2669 shard_index: 7,
2670 first_block_index: 10,
2671 data_block_count: 1,
2672 parity_block_count: 0,
2673 encrypted_size: 4096,
2674 decompressed_size: shard.to_bytes().len() as u32,
2675 file_count: 1,
2676 first_path_hash: path_hash,
2677 last_path_hash: path_hash,
2678 };
2679
2680 let mut unsupported_version = shard.to_bytes();
2681 write_u32(&mut unsupported_version, 4, 2);
2682 assert_eq!(
2683 IndexShard::parse(&unsupported_version, &locating, MetadataLimits::default())
2684 .unwrap_err(),
2685 FormatError::InvalidMetadata {
2686 structure: "IndexShard",
2687 reason: "unsupported version",
2688 }
2689 );
2690
2691 let mut nonzero_zero_frame_table = shard.to_bytes();
2692 write_u32(&mut nonzero_zero_frame_table, 20, 0);
2693 write_u32(
2694 &mut nonzero_zero_frame_table,
2695 32,
2696 INDEX_SHARD_HEADER_LEN as u32,
2697 );
2698 assert_eq!(
2699 IndexShard::parse(
2700 &nonzero_zero_frame_table,
2701 &locating,
2702 MetadataLimits::default()
2703 )
2704 .unwrap_err(),
2705 FormatError::InvalidMetadata {
2706 structure: "IndexShard",
2707 reason: "absent counted table has non-zero offset",
2708 }
2709 );
2710
2711 let mut nonzero_zero_envelope_table = shard.to_bytes();
2712 write_u32(&mut nonzero_zero_envelope_table, 24, 0);
2713 write_u32(
2714 &mut nonzero_zero_envelope_table,
2715 36,
2716 (INDEX_SHARD_HEADER_LEN + FILE_ENTRY_LEN + FRAME_ENTRY_LEN) as u32,
2717 );
2718 assert_eq!(
2719 IndexShard::parse(
2720 &nonzero_zero_envelope_table,
2721 &locating,
2722 MetadataLimits::default()
2723 )
2724 .unwrap_err(),
2725 FormatError::InvalidMetadata {
2726 structure: "IndexShard",
2727 reason: "absent counted table has non-zero offset",
2728 }
2729 );
2730 }
2731
2732 #[test]
2733 fn directory_hint_table_rejects_zero_count_nonzero_offsets() {
2734 let path = b"dir".to_vec();
2735 let table = DirectoryHintTable {
2736 header: DirectoryHintTableHeader {
2737 version: 1,
2738 hint_shard_index: 5,
2739 entry_count: 0,
2740 entry_table_offset: 0,
2741 shard_list_offset: 0,
2742 string_pool_offset: 0,
2743 string_pool_size: 0,
2744 },
2745 entries: vec![DirectoryHintEntry {
2746 dir_hash: hash_prefix(&path),
2747 path_offset: 0,
2748 path_length: path.len() as u32,
2749 shard_list_start_index: 0,
2750 shard_count: 1,
2751 }],
2752 shard_row_indexes: vec![0],
2753 string_pool: path.clone(),
2754 entry_paths: vec![path.clone()],
2755 };
2756 let locating = DirectoryHintShardEntry {
2757 hint_shard_index: 5,
2758 first_dir_hash: hash_prefix(&path),
2759 last_dir_hash: hash_prefix(&path),
2760 first_block_index: 0,
2761 data_block_count: 1,
2762 parity_block_count: 0,
2763 encrypted_size: 4096,
2764 decompressed_size: table.to_bytes().len() as u32,
2765 entry_count: 1,
2766 };
2767 let mut bytes = table.to_bytes();
2768 let bytes_len = bytes.len() as u64;
2769 write_u64(&mut bytes, 48, 0);
2770 write_u64(&mut bytes, 40, bytes_len);
2771
2772 assert_eq!(
2773 DirectoryHintTable::parse(&bytes, &locating, 1, MetadataLimits::default()).unwrap_err(),
2774 FormatError::InvalidMetadata {
2775 structure: "DirectoryHintTable",
2776 reason: "absent string pool has non-zero offset",
2777 }
2778 );
2779 }
2780
2781 #[test]
2782 fn index_shard_rejects_non_exact_local_frame_and_envelope_tables() {
2783 let path = b"exact-local.txt";
2784 let path_hash = hash_prefix(path);
2785 let file = FileEntry {
2786 path_hash,
2787 path_offset: 0,
2788 path_length: path.len() as u32,
2789 first_frame_index: 0,
2790 frame_count: 1,
2791 offset_in_first_frame_plaintext: 0,
2792 tar_member_group_size: 512,
2793 file_data_size: 0,
2794 flags: 0,
2795 };
2796 let frame = FrameEntry {
2797 frame_index: 0,
2798 envelope_index: 0,
2799 offset_in_envelope: 0,
2800 compressed_size: 128,
2801 decompressed_size: 512,
2802 flags: 0,
2803 tar_stream_offset: 0,
2804 };
2805 let envelope = EnvelopeEntry {
2806 envelope_index: 0,
2807 first_block_index: 10,
2808 data_block_count: 1,
2809 parity_block_count: 0,
2810 encrypted_size: 4096,
2811 plaintext_size: 128,
2812 first_frame_index: 0,
2813 frame_count: 1,
2814 };
2815 let shard = IndexShard {
2816 header: IndexShardHeader {
2817 version: 1,
2818 shard_index: 3,
2819 file_count: 0,
2820 frame_count: 0,
2821 envelope_count: 0,
2822 file_table_offset: 0,
2823 frame_table_offset: 0,
2824 envelope_table_offset: 0,
2825 string_pool_offset: 0,
2826 string_pool_size: 0,
2827 },
2828 files: vec![file.clone()],
2829 frames: vec![frame.clone()],
2830 envelopes: vec![envelope.clone()],
2831 string_pool: path.to_vec(),
2832 file_paths: Vec::new(),
2833 file_tar_member_group_starts: Vec::new(),
2834 };
2835 let locating = ShardEntry {
2836 shard_index: 3,
2837 first_block_index: 20,
2838 data_block_count: 1,
2839 parity_block_count: 0,
2840 encrypted_size: 4096,
2841 decompressed_size: shard.to_bytes().len() as u32,
2842 file_count: 1,
2843 first_path_hash: path_hash,
2844 last_path_hash: path_hash,
2845 };
2846 IndexShard::parse(&shard.to_bytes(), &locating, MetadataLimits::default()).unwrap();
2847
2848 let parse_with = |frames: Vec<FrameEntry>, envelopes: Vec<EnvelopeEntry>| {
2849 let mut mutated = shard.clone();
2850 mutated.frames = frames;
2851 mutated.envelopes = envelopes;
2852 let bytes = mutated.to_bytes();
2853 let locating = ShardEntry {
2854 decompressed_size: bytes.len() as u32,
2855 ..locating.clone()
2856 };
2857 IndexShard::parse(&bytes, &locating, MetadataLimits::default()).unwrap_err()
2858 };
2859
2860 let mut missing_frame = frame.clone();
2861 missing_frame.frame_index = 1;
2862 assert_eq!(
2863 parse_with(vec![missing_frame], vec![envelope.clone()]),
2864 FormatError::InvalidMetadata {
2865 structure: "FileEntry",
2866 reason: "referenced FrameEntry is missing",
2867 }
2868 );
2869
2870 let mut unreferenced_frame = frame.clone();
2871 unreferenced_frame.frame_index = 9;
2872 unreferenced_frame.tar_stream_offset = 1024;
2873 assert_eq!(
2874 parse_with(
2875 vec![frame.clone(), unreferenced_frame],
2876 vec![envelope.clone()]
2877 ),
2878 FormatError::InvalidMetadata {
2879 structure: "IndexShard",
2880 reason: "FrameEntry table is not the exact set referenced by FileEntry rows",
2881 }
2882 );
2883
2884 assert_eq!(
2885 parse_with(vec![frame.clone(), frame.clone()], vec![envelope.clone()]),
2886 FormatError::InvalidMetadata {
2887 structure: "IndexShard",
2888 reason: "FrameEntry rows are not sorted and unique",
2889 }
2890 );
2891
2892 let mut missing_envelope = envelope.clone();
2893 missing_envelope.envelope_index = 1;
2894 assert_eq!(
2895 parse_with(vec![frame.clone()], vec![missing_envelope]),
2896 FormatError::InvalidMetadata {
2897 structure: "FrameEntry",
2898 reason: "referenced EnvelopeEntry is missing",
2899 }
2900 );
2901
2902 let mut unreferenced_envelope = envelope.clone();
2903 unreferenced_envelope.envelope_index = 9;
2904 unreferenced_envelope.first_block_index = 11;
2905 unreferenced_envelope.first_frame_index = 9;
2906 assert_eq!(
2907 parse_with(
2908 vec![frame.clone()],
2909 vec![envelope.clone(), unreferenced_envelope]
2910 ),
2911 FormatError::InvalidMetadata {
2912 structure: "IndexShard",
2913 reason: "EnvelopeEntry table is not the exact set referenced by FrameEntry rows",
2914 }
2915 );
2916
2917 assert_eq!(
2918 parse_with(vec![frame], vec![envelope.clone(), envelope]),
2919 FormatError::InvalidMetadata {
2920 structure: "IndexShard",
2921 reason: "EnvelopeEntry rows are not sorted and unique",
2922 }
2923 );
2924 }
2925
2926 #[test]
2927 fn metadata_parsers_reject_malformed_buffer_corpus() {
2928 let limits = MetadataLimits::default();
2929 let path = b"file.txt";
2930 let path_hash = hash_prefix(path);
2931 let shard_entry = ShardEntry {
2932 shard_index: 0,
2933 first_block_index: 1,
2934 data_block_count: 1,
2935 parity_block_count: 0,
2936 encrypted_size: 4096,
2937 decompressed_size: 0,
2938 file_count: 1,
2939 first_path_hash: path_hash,
2940 last_path_hash: path_hash,
2941 };
2942
2943 let root = IndexRoot {
2944 header: IndexRootHeader {
2945 file_count: 1,
2946 frame_count: 1,
2947 envelope_count: 1,
2948 payload_block_count: 1,
2949 tar_total_size: 512,
2950 ..IndexRootHeader::empty()
2951 },
2952 shards: vec![ShardEntry {
2953 decompressed_size: 256,
2954 ..shard_entry.clone()
2955 }],
2956 directory_hint_shards: Vec::new(),
2957 };
2958 let root_bytes = root.to_bytes();
2959 IndexRoot::parse(&root_bytes, false, limits).unwrap();
2960
2961 assert_eq!(
2962 IndexRoot::parse(&root_bytes[..INDEX_ROOT_LEN - 1], false, limits).unwrap_err(),
2963 FormatError::InvalidMetadata {
2964 structure: "IndexRoot",
2965 reason: "plaintext is shorter than fixed header",
2966 }
2967 );
2968 let mut bad_root = root_bytes.clone();
2969 bad_root[0] ^= 1;
2970 assert_eq!(
2971 IndexRoot::parse(&bad_root, false, limits).unwrap_err(),
2972 FormatError::BadMagic {
2973 structure: "IndexRoot"
2974 }
2975 );
2976 let mut bad_root = root_bytes.clone();
2977 write_u32(&mut bad_root, 4, 2);
2978 assert_eq!(
2979 IndexRoot::parse(&bad_root, false, limits).unwrap_err(),
2980 FormatError::InvalidMetadata {
2981 structure: "IndexRoot",
2982 reason: "unsupported version",
2983 }
2984 );
2985 let mut bad_root = root_bytes.clone();
2986 bad_root[128] = 1;
2987 assert_eq!(
2988 IndexRoot::parse(&bad_root, false, limits).unwrap_err(),
2989 FormatError::NonZeroReserved {
2990 structure: "IndexRoot"
2991 }
2992 );
2993 let mut bad_root = root_bytes.clone();
2994 write_u64(&mut bad_root, 88, (INDEX_ROOT_LEN + 1) as u64);
2995 assert_eq!(
2996 IndexRoot::parse(&bad_root, false, limits).unwrap_err(),
2997 FormatError::InvalidMetadata {
2998 structure: "IndexRoot",
2999 reason: "shard table",
3000 }
3001 );
3002 assert_eq!(
3003 IndexRoot::parse(&root_bytes[..root_bytes.len() - 1], false, limits).unwrap_err(),
3004 FormatError::InvalidMetadata {
3005 structure: "IndexRoot",
3006 reason: "range is out of bounds",
3007 }
3008 );
3009 let mut bad_root = root_bytes.clone();
3010 bad_root.push(0);
3011 assert_eq!(
3012 IndexRoot::parse(&bad_root, false, limits).unwrap_err(),
3013 FormatError::InvalidMetadata {
3014 structure: "IndexRoot",
3015 reason: "plaintext length does not match canonical cursor",
3016 }
3017 );
3018
3019 let file = FileEntry {
3020 path_hash,
3021 path_offset: 0,
3022 path_length: path.len() as u32,
3023 first_frame_index: 0,
3024 frame_count: 1,
3025 offset_in_first_frame_plaintext: 0,
3026 tar_member_group_size: 512,
3027 file_data_size: 0,
3028 flags: 0,
3029 };
3030 let frame = FrameEntry {
3031 frame_index: 0,
3032 envelope_index: 0,
3033 offset_in_envelope: 0,
3034 compressed_size: 128,
3035 decompressed_size: 512,
3036 flags: 0x0000_0003,
3037 tar_stream_offset: 0,
3038 };
3039 let envelope = EnvelopeEntry {
3040 envelope_index: 0,
3041 first_block_index: 1,
3042 data_block_count: 1,
3043 parity_block_count: 0,
3044 encrypted_size: 4096,
3045 plaintext_size: 128,
3046 first_frame_index: 0,
3047 frame_count: 1,
3048 };
3049 let shard = IndexShard {
3050 header: IndexShardHeader {
3051 version: 1,
3052 shard_index: 0,
3053 file_count: 0,
3054 frame_count: 0,
3055 envelope_count: 0,
3056 file_table_offset: 0,
3057 frame_table_offset: 0,
3058 envelope_table_offset: 0,
3059 string_pool_offset: 0,
3060 string_pool_size: 0,
3061 },
3062 files: vec![file],
3063 frames: vec![frame],
3064 envelopes: vec![envelope],
3065 string_pool: path.to_vec(),
3066 file_paths: Vec::new(),
3067 file_tar_member_group_starts: Vec::new(),
3068 };
3069 let shard_bytes = shard.to_bytes();
3070 let locating = ShardEntry {
3071 decompressed_size: shard_bytes.len() as u32,
3072 ..shard_entry
3073 };
3074 IndexShard::parse(&shard_bytes, &locating, limits).unwrap();
3075
3076 assert_eq!(
3077 IndexShard::parse(
3078 &shard_bytes[..INDEX_SHARD_HEADER_LEN - 1],
3079 &locating,
3080 limits
3081 )
3082 .unwrap_err(),
3083 FormatError::InvalidMetadata {
3084 structure: "IndexShard",
3085 reason: "plaintext is shorter than fixed header",
3086 }
3087 );
3088 let mut bad_shard = shard_bytes.clone();
3089 bad_shard[0] ^= 1;
3090 assert_eq!(
3091 IndexShard::parse(&bad_shard, &locating, limits).unwrap_err(),
3092 FormatError::BadMagic {
3093 structure: "IndexShard"
3094 }
3095 );
3096 let mut bad_shard = shard_bytes.clone();
3097 bad_shard[48] = 1;
3098 assert_eq!(
3099 IndexShard::parse(&bad_shard, &locating, limits).unwrap_err(),
3100 FormatError::NonZeroReserved {
3101 structure: "IndexShard"
3102 }
3103 );
3104 let mut bad_shard = shard_bytes.clone();
3105 write_u32(&mut bad_shard, 28, INDEX_SHARD_HEADER_LEN as u32 + 1);
3106 assert_eq!(
3107 IndexShard::parse(&bad_shard, &locating, limits).unwrap_err(),
3108 FormatError::InvalidMetadata {
3109 structure: "IndexShard",
3110 reason: "file table",
3111 }
3112 );
3113 assert_eq!(
3114 IndexShard::parse(&shard_bytes[..shard_bytes.len() - 1], &locating, limits)
3115 .unwrap_err(),
3116 FormatError::InvalidMetadata {
3117 structure: "IndexShard",
3118 reason: "range is out of bounds",
3119 }
3120 );
3121 let mut bad_shard = shard_bytes.clone();
3122 bad_shard.push(0);
3123 assert_eq!(
3124 IndexShard::parse(&bad_shard, &locating, limits).unwrap_err(),
3125 FormatError::InvalidMetadata {
3126 structure: "IndexShard",
3127 reason: "plaintext length does not match canonical cursor",
3128 }
3129 );
3130
3131 let dir_path = b"dir".to_vec();
3132 let dir_hash = hash_prefix(&dir_path);
3133 let table = DirectoryHintTable {
3134 header: DirectoryHintTableHeader {
3135 version: 1,
3136 hint_shard_index: 0,
3137 entry_count: 0,
3138 entry_table_offset: 0,
3139 shard_list_offset: 0,
3140 string_pool_offset: 0,
3141 string_pool_size: 0,
3142 },
3143 entries: vec![DirectoryHintEntry {
3144 dir_hash,
3145 path_offset: 0,
3146 path_length: dir_path.len() as u32,
3147 shard_list_start_index: 0,
3148 shard_count: 1,
3149 }],
3150 shard_row_indexes: vec![0],
3151 string_pool: dir_path.clone(),
3152 entry_paths: Vec::new(),
3153 };
3154 let table_bytes = table.to_bytes();
3155 let locating_hint = DirectoryHintShardEntry {
3156 hint_shard_index: 0,
3157 first_dir_hash: dir_hash,
3158 last_dir_hash: dir_hash,
3159 first_block_index: 2,
3160 data_block_count: 1,
3161 parity_block_count: 0,
3162 encrypted_size: 4096,
3163 decompressed_size: table_bytes.len() as u32,
3164 entry_count: 1,
3165 };
3166 DirectoryHintTable::parse(&table_bytes, &locating_hint, 1, limits).unwrap();
3167
3168 assert_eq!(
3169 DirectoryHintTable::parse(
3170 &table_bytes[..DIRECTORY_HINT_TABLE_LEN - 1],
3171 &locating_hint,
3172 1,
3173 limits,
3174 )
3175 .unwrap_err(),
3176 FormatError::InvalidMetadata {
3177 structure: "DirectoryHintTable",
3178 reason: "plaintext is shorter than fixed header",
3179 }
3180 );
3181 let mut bad_table = table_bytes.clone();
3182 bad_table[0] ^= 1;
3183 assert_eq!(
3184 DirectoryHintTable::parse(&bad_table, &locating_hint, 1, limits).unwrap_err(),
3185 FormatError::BadMagic {
3186 structure: "DirectoryHintTable"
3187 }
3188 );
3189 let mut bad_table = table_bytes.clone();
3190 bad_table[56] = 1;
3191 assert_eq!(
3192 DirectoryHintTable::parse(&bad_table, &locating_hint, 1, limits).unwrap_err(),
3193 FormatError::NonZeroReserved {
3194 structure: "DirectoryHintTable"
3195 }
3196 );
3197 let mut bad_table = table_bytes.clone();
3198 write_u64(&mut bad_table, 24, DIRECTORY_HINT_TABLE_LEN as u64 + 1);
3199 assert_eq!(
3200 DirectoryHintTable::parse(&bad_table, &locating_hint, 1, limits).unwrap_err(),
3201 FormatError::InvalidMetadata {
3202 structure: "DirectoryHintTable",
3203 reason: "entry table",
3204 }
3205 );
3206 assert_eq!(
3207 DirectoryHintTable::parse(
3208 &table_bytes[..table_bytes.len() - 1],
3209 &locating_hint,
3210 1,
3211 limits
3212 )
3213 .unwrap_err(),
3214 FormatError::InvalidMetadata {
3215 structure: "DirectoryHintTable",
3216 reason: "range is out of bounds",
3217 }
3218 );
3219 let mut bad_table = table_bytes.clone();
3220 bad_table.push(0);
3221 assert_eq!(
3222 DirectoryHintTable::parse(&bad_table, &locating_hint, 1, limits).unwrap_err(),
3223 FormatError::InvalidMetadata {
3224 structure: "DirectoryHintTable",
3225 reason: "plaintext length does not match canonical cursor",
3226 }
3227 );
3228 }
3229
3230 #[test]
3231 fn candidate_path_lookup_uses_supplied_collision_cap() {
3232 let path = b"same-prefix.txt";
3233 let hash = hash_prefix(path);
3234 let root = IndexRoot {
3235 header: IndexRootHeader::empty(),
3236 shards: (0..3)
3237 .map(|idx| ShardEntry {
3238 shard_index: idx,
3239 first_block_index: idx,
3240 data_block_count: 1,
3241 parity_block_count: 1,
3242 encrypted_size: 4096,
3243 decompressed_size: 256,
3244 file_count: 1,
3245 first_path_hash: hash,
3246 last_path_hash: hash,
3247 })
3248 .collect(),
3249 directory_hint_shards: Vec::new(),
3250 };
3251
3252 let mut limits = MetadataLimits::default();
3253 limits.max_hash_collision_shard_scan = 0;
3254 assert_eq!(
3255 root.candidate_shards_for_path(path, limits).unwrap_err(),
3256 FormatError::HashPrefixCollisionRunExceeded
3257 );
3258
3259 limits.max_hash_collision_shard_scan = 2;
3260 assert_eq!(
3261 root.candidate_shards_for_path(path, limits).unwrap(),
3262 vec![0, 1, 2]
3263 );
3264 }
3265
3266 #[test]
3267 fn parses_single_shard_and_finds_final_file_entry() {
3268 let path = b"file.txt";
3269 let path_hash = hash_prefix(path);
3270 let file = FileEntry {
3271 path_hash,
3272 path_offset: 0,
3273 path_length: path.len() as u32,
3274 first_frame_index: 0,
3275 frame_count: 1,
3276 offset_in_first_frame_plaintext: 0,
3277 tar_member_group_size: 512,
3278 file_data_size: 0,
3279 flags: 0,
3280 };
3281 let frame = FrameEntry {
3282 frame_index: 0,
3283 envelope_index: 0,
3284 offset_in_envelope: 0,
3285 compressed_size: 128,
3286 decompressed_size: 512,
3287 flags: 0,
3288 tar_stream_offset: 0,
3289 };
3290 let envelope = EnvelopeEntry {
3291 envelope_index: 0,
3292 first_block_index: 0,
3293 data_block_count: 1,
3294 parity_block_count: 1,
3295 encrypted_size: 4096,
3296 plaintext_size: 128,
3297 first_frame_index: 0,
3298 frame_count: 1,
3299 };
3300 let shard = IndexShard {
3301 header: IndexShardHeader {
3302 version: 1,
3303 shard_index: 7,
3304 file_count: 0,
3305 frame_count: 0,
3306 envelope_count: 0,
3307 file_table_offset: 0,
3308 frame_table_offset: 0,
3309 envelope_table_offset: 0,
3310 string_pool_offset: 0,
3311 string_pool_size: 0,
3312 },
3313 files: vec![file],
3314 frames: vec![frame],
3315 envelopes: vec![envelope],
3316 string_pool: path.to_vec(),
3317 file_paths: Vec::new(),
3318 file_tar_member_group_starts: Vec::new(),
3319 };
3320 let locating = ShardEntry {
3321 shard_index: 7,
3322 first_block_index: 10,
3323 data_block_count: 1,
3324 parity_block_count: 1,
3325 encrypted_size: 4096,
3326 decompressed_size: shard.to_bytes().len() as u32,
3327 file_count: 1,
3328 first_path_hash: path_hash,
3329 last_path_hash: path_hash,
3330 };
3331
3332 let parsed =
3333 IndexShard::parse(&shard.to_bytes(), &locating, MetadataLimits::default()).unwrap();
3334
3335 assert_eq!(parsed.lookup_file_index(path), Some(0));
3336 assert_eq!(parsed.file_path(0), Some(path.as_slice()));
3337 }
3338}