1use crate::checksum::jenkins_lookup3;
12use crate::chunk_index::ChunkEntry;
13use crate::error::{Error, Result};
14use crate::io::Cursor;
15use crate::storage::Storage;
16
17const FAHD_SIGNATURE: [u8; 4] = *b"FAHD";
18const FADB_SIGNATURE: [u8; 4] = *b"FADB";
19
20#[derive(Debug)]
22struct FaHeader {
23 client_id: u8,
24 entry_size: u8,
25 page_bits: u8,
26 num_entries: u64,
27 data_block_address: u64,
28}
29
30fn parse_header(data: &[u8], address: u64, offset_size: u8, length_size: u8) -> Result<FaHeader> {
36 let mut cursor = Cursor::new(data);
37 cursor.set_position(address);
38
39 let sig = cursor.read_bytes(4)?;
40 if sig != FAHD_SIGNATURE {
41 return Err(Error::InvalidFixedArraySignature {
42 context: "header signature mismatch",
43 });
44 }
45
46 let version = cursor.read_u8()?;
47 if version != 0 {
48 return Err(Error::Other(format!(
49 "unsupported fixed array header version {}",
50 version
51 )));
52 }
53
54 let client_id = cursor.read_u8()?;
55 let entry_size = cursor.read_u8()?;
56 let page_bits = cursor.read_u8()?;
57 let num_entries = cursor.read_length(length_size)?;
58 let data_block_address = cursor.read_offset(offset_size)?;
59
60 let header_end = cursor.position();
62 let header_bytes = &data[address as usize..header_end as usize];
63 let stored_checksum = cursor.read_u32_le()?;
64 let computed = jenkins_lookup3(header_bytes);
65 if stored_checksum != computed {
66 return Err(Error::ChecksumMismatch {
67 expected: stored_checksum,
68 actual: computed,
69 });
70 }
71
72 Ok(FaHeader {
73 client_id,
74 entry_size,
75 page_bits,
76 num_entries,
77 data_block_address,
78 })
79}
80
81fn parse_header_storage(
82 storage: &dyn Storage,
83 address: u64,
84 offset_size: u8,
85 length_size: u8,
86) -> Result<FaHeader> {
87 let header_len = 4 + 1 + 1 + 1 + 1 + usize::from(length_size) + usize::from(offset_size) + 4;
88 let bytes = storage.read_range(address, header_len)?;
89 parse_header(bytes.as_ref(), 0, offset_size, length_size)
90}
91
92#[derive(Debug)]
94struct FaRawEntry {
95 address: u64,
96 chunk_size: u64,
97 filter_mask: u32,
98}
99
100fn parse_data_block(
102 data: &[u8],
103 address: u64,
104 header: &FaHeader,
105 offset_size: u8,
106) -> Result<Vec<FaRawEntry>> {
107 let mut cursor = Cursor::new(data);
108 cursor.set_position(address);
109
110 let sig = cursor.read_bytes(4)?;
111 if sig != FADB_SIGNATURE {
112 return Err(Error::InvalidFixedArraySignature {
113 context: "data block signature mismatch",
114 });
115 }
116
117 let version = cursor.read_u8()?;
118 if version != 0 {
119 return Err(Error::Other(format!(
120 "unsupported fixed array data block version {}",
121 version
122 )));
123 }
124
125 let _client_id = cursor.read_u8()?;
126 let _header_address = cursor.read_offset(offset_size)?;
127
128 let num_entries = header.num_entries as usize;
129 let is_filtered = header.client_id == 1;
130
131 let use_paging = header.page_bits > 0 && num_entries > (1usize << header.page_bits);
133
134 if !use_paging {
135 let entries = read_entries(
137 &mut cursor,
138 num_entries,
139 is_filtered,
140 offset_size,
141 header.entry_size,
142 )?;
143 let _checksum = cursor.read_u32_le()?;
145 Ok(entries)
146 } else {
147 let entries_per_page = 1usize << header.page_bits;
149 let num_pages = num_entries.div_ceil(entries_per_page);
150
151 let bitmap_bytes = num_pages.div_ceil(8);
155 let page_bitmap = cursor.read_bytes(bitmap_bytes)?.to_vec();
156
157 let mut all_entries = Vec::with_capacity(num_entries);
158
159 for page_idx in 0..num_pages {
160 let byte_idx = page_idx / 8;
161 let bit_idx = page_idx % 8;
162 let page_initialized =
163 byte_idx < page_bitmap.len() && (page_bitmap[byte_idx] & (1 << bit_idx)) != 0;
164
165 let entries_in_this_page = if page_idx == num_pages - 1 {
166 let remainder = num_entries % entries_per_page;
167 if remainder == 0 {
168 entries_per_page
169 } else {
170 remainder
171 }
172 } else {
173 entries_per_page
174 };
175
176 if page_initialized {
177 let page_entries = read_entries(
178 &mut cursor,
179 entries_in_this_page,
180 is_filtered,
181 offset_size,
182 header.entry_size,
183 )?;
184 let _page_checksum = cursor.read_u32_le()?;
186 all_entries.extend(page_entries);
187 } else {
188 for _ in 0..entries_in_this_page {
190 all_entries.push(FaRawEntry {
191 address: u64::MAX,
192 chunk_size: 0,
193 filter_mask: 0,
194 });
195 }
196 }
197 }
198
199 Ok(all_entries)
200 }
201}
202
203fn read_entries(
205 cursor: &mut Cursor<'_>,
206 count: usize,
207 is_filtered: bool,
208 offset_size: u8,
209 entry_size: u8,
210) -> Result<Vec<FaRawEntry>> {
211 let mut entries = Vec::with_capacity(count);
212 for _ in 0..count {
213 let address = cursor.read_offset(offset_size)?;
214 let (chunk_size, filter_mask) = if is_filtered {
215 let chunk_size_len = entry_size
216 .checked_sub(offset_size)
217 .and_then(|remaining| remaining.checked_sub(4))
218 .ok_or_else(|| Error::InvalidData("invalid fixed array entry size".into()))?;
219 let cs = cursor.read_length(chunk_size_len)?;
220 let fm = cursor.read_u32_le()?;
221 (cs, fm)
222 } else {
223 (0, 0)
224 };
225 entries.push(FaRawEntry {
226 address,
227 chunk_size,
228 filter_mask,
229 });
230 }
231 Ok(entries)
232}
233
234fn read_entry_at(
235 data: &[u8],
236 position: u64,
237 is_filtered: bool,
238 offset_size: u8,
239 entry_size: u8,
240) -> Result<FaRawEntry> {
241 let mut cursor = Cursor::new(data);
242 cursor.set_position(position);
243 let mut entries = read_entries(&mut cursor, 1, is_filtered, offset_size, entry_size)?;
244 entries
245 .pop()
246 .ok_or_else(|| Error::InvalidData("missing fixed array entry".into()))
247}
248
249fn read_entry_at_storage(
250 storage: &dyn Storage,
251 position: u64,
252 is_filtered: bool,
253 offset_size: u8,
254 entry_size: u8,
255) -> Result<FaRawEntry> {
256 let bytes = storage.read_range(position, usize::from(entry_size))?;
257 let mut cursor = Cursor::new(bytes.as_ref());
258 let mut entries = read_entries(&mut cursor, 1, is_filtered, offset_size, entry_size)?;
259 entries
260 .pop()
261 .ok_or_else(|| Error::InvalidData("missing fixed array entry".into()))
262}
263
264fn linear_target_offsets(
265 dataset_shape: &[u64],
266 chunk_dims: &[u32],
267 chunk_bounds: Option<(&[u64], &[u64])>,
268) -> Vec<(usize, Vec<u64>)> {
269 let ndim = dataset_shape.len();
270 let chunks_per_dim: Vec<u64> = (0..ndim)
271 .map(|i| dataset_shape[i].div_ceil(chunk_dims[i] as u64))
272 .collect();
273
274 if ndim == 0 {
275 return vec![(0, Vec::new())];
276 }
277
278 let (first_chunk, last_chunk): (Vec<u64>, Vec<u64>) = match chunk_bounds {
279 Some((first, last)) => (first.to_vec(), last.to_vec()),
280 None => (
281 vec![0u64; ndim],
282 chunks_per_dim
283 .iter()
284 .map(|count| count.saturating_sub(1))
285 .collect(),
286 ),
287 };
288
289 let mut targets = Vec::new();
290 let mut chunk_indices = first_chunk.clone();
291 loop {
292 let mut linear_idx = 0u64;
293 for (dim, chunk_index) in chunk_indices.iter().enumerate() {
294 linear_idx = linear_idx * chunks_per_dim[dim] + chunk_index;
295 }
296 let offsets = chunk_indices
297 .iter()
298 .enumerate()
299 .map(|(dim, chunk_index)| chunk_index * u64::from(chunk_dims[dim]))
300 .collect();
301 targets.push((linear_idx as usize, offsets));
302
303 let mut advanced = false;
304 for dim in (0..ndim).rev() {
305 if chunk_indices[dim] < last_chunk[dim] {
306 chunk_indices[dim] += 1;
307 if dim + 1 < ndim {
308 chunk_indices[(dim + 1)..ndim].copy_from_slice(&first_chunk[(dim + 1)..ndim]);
309 }
310 advanced = true;
311 break;
312 }
313 }
314
315 if !advanced {
316 break;
317 }
318 }
319
320 targets
321}
322
323fn collect_fixed_array_chunk_entries_bounded(
324 data: &[u8],
325 header: &FaHeader,
326 offset_size: u8,
327 dataset_shape: &[u64],
328 chunk_dims: &[u32],
329 chunk_bounds: (&[u64], &[u64]),
330) -> Result<Vec<ChunkEntry>> {
331 let targets = linear_target_offsets(dataset_shape, chunk_dims, Some(chunk_bounds));
332 let mut cursor = Cursor::new(data);
333 cursor.set_position(header.data_block_address);
334
335 let sig = cursor.read_bytes(4)?;
336 if sig != FADB_SIGNATURE {
337 return Err(Error::InvalidFixedArraySignature {
338 context: "data block signature mismatch",
339 });
340 }
341
342 let version = cursor.read_u8()?;
343 if version != 0 {
344 return Err(Error::Other(format!(
345 "unsupported fixed array data block version {}",
346 version
347 )));
348 }
349
350 let _client_id = cursor.read_u8()?;
351 let _header_address = cursor.read_offset(offset_size)?;
352
353 let num_entries = header.num_entries as usize;
354 let is_filtered = header.client_id == 1;
355 let entry_bytes = header.entry_size as usize;
356 let use_paging = header.page_bits > 0 && num_entries > (1usize << header.page_bits);
357
358 if !use_paging {
359 let entries_start = cursor.position();
360 let mut entries = Vec::new();
361 for (linear_idx, offsets) in targets {
362 let position = entries_start + (linear_idx * entry_bytes) as u64;
363 let raw = read_entry_at(data, position, is_filtered, offset_size, header.entry_size)?;
364 if Cursor::is_undefined_offset(raw.address, offset_size) {
365 continue;
366 }
367 entries.push(ChunkEntry {
368 address: raw.address,
369 size: raw.chunk_size,
370 filter_mask: raw.filter_mask,
371 offsets,
372 });
373 }
374 return Ok(entries);
375 }
376
377 let entries_per_page = 1usize << header.page_bits;
378 let num_pages = num_entries.div_ceil(entries_per_page);
379 let bitmap_bytes = num_pages.div_ceil(8);
380 let page_bitmap = cursor.read_bytes(bitmap_bytes)?.to_vec();
381 let pages_start = cursor.position();
382
383 let mut page_offsets = vec![None; num_pages];
384 let mut next_page_start = pages_start;
385 for (page_idx, page_offset) in page_offsets.iter_mut().enumerate().take(num_pages) {
386 let byte_idx = page_idx / 8;
387 let bit_idx = page_idx % 8;
388 let page_initialized =
389 byte_idx < page_bitmap.len() && (page_bitmap[byte_idx] & (1 << bit_idx)) != 0;
390
391 let entries_in_page = if page_idx == num_pages - 1 {
392 let remainder = num_entries % entries_per_page;
393 if remainder == 0 {
394 entries_per_page
395 } else {
396 remainder
397 }
398 } else {
399 entries_per_page
400 };
401
402 if page_initialized {
403 *page_offset = Some(next_page_start);
404 next_page_start += (entries_in_page * entry_bytes + 4) as u64;
405 }
406 }
407
408 let mut entries = Vec::new();
409 for (linear_idx, offsets) in targets {
410 let page_idx = linear_idx / entries_per_page;
411 let within_page = linear_idx % entries_per_page;
412 let Some(page_start) = page_offsets[page_idx] else {
413 continue;
414 };
415 let position = page_start + (within_page * entry_bytes) as u64;
416 let raw = read_entry_at(data, position, is_filtered, offset_size, header.entry_size)?;
417 if Cursor::is_undefined_offset(raw.address, offset_size) {
418 continue;
419 }
420 entries.push(ChunkEntry {
421 address: raw.address,
422 size: raw.chunk_size,
423 filter_mask: raw.filter_mask,
424 offsets,
425 });
426 }
427
428 Ok(entries)
429}
430
431fn collect_fixed_array_chunk_entries_bounded_storage(
432 storage: &dyn Storage,
433 header: &FaHeader,
434 offset_size: u8,
435 dataset_shape: &[u64],
436 chunk_dims: &[u32],
437 chunk_bounds: (&[u64], &[u64]),
438) -> Result<Vec<ChunkEntry>> {
439 let targets = linear_target_offsets(dataset_shape, chunk_dims, Some(chunk_bounds));
440 let block_header_len = 4 + 1 + 1 + usize::from(offset_size);
441 let header_bytes = storage.read_range(header.data_block_address, block_header_len)?;
442 let mut cursor = Cursor::new(header_bytes.as_ref());
443
444 let sig = cursor.read_bytes(4)?;
445 if sig != FADB_SIGNATURE {
446 return Err(Error::InvalidFixedArraySignature {
447 context: "data block signature mismatch",
448 });
449 }
450
451 let version = cursor.read_u8()?;
452 if version != 0 {
453 return Err(Error::Other(format!(
454 "unsupported fixed array data block version {}",
455 version
456 )));
457 }
458
459 let _client_id = cursor.read_u8()?;
460 let _header_address = cursor.read_offset(offset_size)?;
461
462 let num_entries = usize::try_from(header.num_entries).map_err(|_| {
463 Error::InvalidData("fixed array entry count exceeds platform usize capacity".into())
464 })?;
465 let is_filtered = header.client_id == 1;
466 let entry_bytes = usize::from(header.entry_size);
467 let use_paging = header.page_bits > 0 && num_entries > (1usize << header.page_bits);
468 let entries_start = header.data_block_address
469 + u64::try_from(block_header_len)
470 .map_err(|_| Error::OffsetOutOfBounds(header.data_block_address))?;
471
472 if !use_paging {
473 let mut entries = Vec::new();
474 for (linear_idx, offsets) in targets {
475 let position = entries_start
476 + u64::try_from(linear_idx * entry_bytes).map_err(|_| {
477 Error::InvalidData("fixed array entry offset exceeds u64 capacity".into())
478 })?;
479 let raw = read_entry_at_storage(
480 storage,
481 position,
482 is_filtered,
483 offset_size,
484 header.entry_size,
485 )?;
486 if Cursor::is_undefined_offset(raw.address, offset_size) {
487 continue;
488 }
489 entries.push(ChunkEntry {
490 address: raw.address,
491 size: raw.chunk_size,
492 filter_mask: raw.filter_mask,
493 offsets,
494 });
495 }
496 return Ok(entries);
497 }
498
499 let entries_per_page = 1usize << header.page_bits;
500 let num_pages = num_entries.div_ceil(entries_per_page);
501 let bitmap_bytes = num_pages.div_ceil(8);
502 let page_bitmap = storage.read_range(entries_start, bitmap_bytes)?;
503 let pages_start = entries_start
504 + u64::try_from(bitmap_bytes).map_err(|_| {
505 Error::InvalidData("fixed array bitmap size exceeds u64 capacity".into())
506 })?;
507
508 let mut page_offsets = vec![None; num_pages];
509 let mut next_page_start = pages_start;
510 for (page_idx, page_offset) in page_offsets.iter_mut().enumerate().take(num_pages) {
511 let byte_idx = page_idx / 8;
512 let bit_idx = page_idx % 8;
513 let page_initialized =
514 byte_idx < page_bitmap.len() && (page_bitmap[byte_idx] & (1 << bit_idx)) != 0;
515
516 let entries_in_page = if page_idx == num_pages - 1 {
517 let remainder = num_entries % entries_per_page;
518 if remainder == 0 {
519 entries_per_page
520 } else {
521 remainder
522 }
523 } else {
524 entries_per_page
525 };
526
527 if page_initialized {
528 *page_offset = Some(next_page_start);
529 next_page_start += u64::try_from(entries_in_page * entry_bytes + 4).map_err(|_| {
530 Error::InvalidData("fixed array page size exceeds u64 capacity".into())
531 })?;
532 }
533 }
534
535 let mut entries = Vec::new();
536 for (linear_idx, offsets) in targets {
537 let page_idx = linear_idx / entries_per_page;
538 let within_page = linear_idx % entries_per_page;
539 let Some(page_start) = page_offsets[page_idx] else {
540 continue;
541 };
542 let position = page_start
543 + u64::try_from(within_page * entry_bytes).map_err(|_| {
544 Error::InvalidData("fixed array page entry offset exceeds u64 capacity".into())
545 })?;
546 let raw = read_entry_at_storage(
547 storage,
548 position,
549 is_filtered,
550 offset_size,
551 header.entry_size,
552 )?;
553 if Cursor::is_undefined_offset(raw.address, offset_size) {
554 continue;
555 }
556 entries.push(ChunkEntry {
557 address: raw.address,
558 size: raw.chunk_size,
559 filter_mask: raw.filter_mask,
560 offsets,
561 });
562 }
563
564 Ok(entries)
565}
566
567pub fn collect_fixed_array_chunk_entries(
572 data: &[u8],
573 header_address: u64,
574 offset_size: u8,
575 length_size: u8,
576 dataset_shape: &[u64],
577 chunk_dims: &[u32],
578 chunk_bounds: Option<(&[u64], &[u64])>,
579) -> Result<Vec<ChunkEntry>> {
580 let header = parse_header(data, header_address, offset_size, length_size)?;
581
582 if Cursor::is_undefined_offset(header.data_block_address, offset_size) {
583 return Ok(Vec::new());
584 }
585
586 if let Some(bounds) = chunk_bounds {
587 return collect_fixed_array_chunk_entries_bounded(
588 data,
589 &header,
590 offset_size,
591 dataset_shape,
592 chunk_dims,
593 bounds,
594 );
595 }
596
597 let raw_entries = parse_data_block(data, header.data_block_address, &header, offset_size)?;
598
599 let ndim = dataset_shape.len();
600 let chunks_per_dim: Vec<u64> = (0..ndim)
601 .map(|i| dataset_shape[i].div_ceil(chunk_dims[i] as u64))
602 .collect();
603
604 let mut entries = Vec::new();
605 for (linear_idx, raw) in raw_entries.iter().enumerate() {
606 if Cursor::is_undefined_offset(raw.address, offset_size) {
608 continue;
609 }
610
611 let mut remaining = linear_idx as u64;
613 let mut offsets = vec![0u64; ndim];
614 for d in (0..ndim).rev() {
615 offsets[d] = (remaining % chunks_per_dim[d]) * chunk_dims[d] as u64;
616 remaining /= chunks_per_dim[d];
617 }
618
619 if let Some((first_chunk, last_chunk)) = chunk_bounds {
620 let overlaps = offsets.iter().enumerate().all(|(dim, offset)| {
621 let chunk_index = *offset / u64::from(chunk_dims[dim]);
622 chunk_index >= first_chunk[dim] && chunk_index <= last_chunk[dim]
623 });
624 if !overlaps {
625 continue;
626 }
627 }
628
629 entries.push(ChunkEntry {
630 address: raw.address,
631 size: raw.chunk_size,
632 filter_mask: raw.filter_mask,
633 offsets,
634 });
635 }
636
637 Ok(entries)
638}
639
640pub fn collect_fixed_array_chunk_entries_storage(
642 storage: &dyn Storage,
643 header_address: u64,
644 offset_size: u8,
645 length_size: u8,
646 dataset_shape: &[u64],
647 chunk_dims: &[u32],
648 chunk_bounds: Option<(&[u64], &[u64])>,
649) -> Result<Vec<ChunkEntry>> {
650 let header = parse_header_storage(storage, header_address, offset_size, length_size)?;
651
652 if Cursor::is_undefined_offset(header.data_block_address, offset_size) {
653 return Ok(Vec::new());
654 }
655
656 if let Some(bounds) = chunk_bounds {
657 return collect_fixed_array_chunk_entries_bounded_storage(
658 storage,
659 &header,
660 offset_size,
661 dataset_shape,
662 chunk_dims,
663 bounds,
664 );
665 }
666
667 let num_entries = usize::try_from(header.num_entries).map_err(|_| {
668 Error::InvalidData("fixed array entry count exceeds platform usize capacity".into())
669 })?;
670 let is_filtered = header.client_id == 1;
671 let header_len = 4 + 1 + 1 + usize::from(offset_size);
672 let use_paging = header.page_bits > 0 && num_entries > (1usize << header.page_bits);
673 let block_len = if !use_paging {
674 header_len + num_entries * usize::from(header.entry_size) + 4
675 } else {
676 let entries_per_page = 1usize << header.page_bits;
677 let num_pages = num_entries.div_ceil(entries_per_page);
678 let bitmap_bytes = num_pages.div_ceil(8);
679 let mut len = header_len + bitmap_bytes;
680 for page_idx in 0..num_pages {
681 let entries_in_page = if page_idx == num_pages - 1 {
682 let remainder = num_entries % entries_per_page;
683 if remainder == 0 {
684 entries_per_page
685 } else {
686 remainder
687 }
688 } else {
689 entries_per_page
690 };
691 len += entries_in_page * usize::from(header.entry_size) + 4;
692 }
693 len
694 };
695 let block = storage.read_range(header.data_block_address, block_len)?;
696 let raw_entries = parse_data_block(block.as_ref(), 0, &header, offset_size)?;
697
698 let ndim = dataset_shape.len();
699 let chunks_per_dim: Vec<u64> = (0..ndim)
700 .map(|i| dataset_shape[i].div_ceil(chunk_dims[i] as u64))
701 .collect();
702
703 let mut entries = Vec::new();
704 for (linear_idx, raw) in raw_entries.iter().enumerate() {
705 if Cursor::is_undefined_offset(raw.address, offset_size) {
706 continue;
707 }
708 let mut remaining = linear_idx as u64;
709 let mut offsets = vec![0u64; ndim];
710 for d in (0..ndim).rev() {
711 offsets[d] = (remaining % chunks_per_dim[d]) * chunk_dims[d] as u64;
712 remaining /= chunks_per_dim[d];
713 }
714
715 if let Some((first_chunk, last_chunk)) = chunk_bounds {
716 let overlaps = offsets.iter().enumerate().all(|(dim, offset)| {
717 let chunk_index = *offset / u64::from(chunk_dims[dim]);
718 chunk_index >= first_chunk[dim] && chunk_index <= last_chunk[dim]
719 });
720 if !overlaps {
721 continue;
722 }
723 }
724
725 entries.push(ChunkEntry {
726 address: raw.address,
727 size: if is_filtered { raw.chunk_size } else { 0 },
728 filter_mask: raw.filter_mask,
729 offsets,
730 });
731 }
732
733 Ok(entries)
734}
735
736#[cfg(test)]
737mod tests {
738 use super::*;
739
740 #[test]
741 fn test_fahd_bad_signature() {
742 let mut data = vec![0u8; 64];
743 data[0..4].copy_from_slice(b"XXXX");
744 let err = parse_header(&data, 0, 8, 8).unwrap_err();
745 assert!(matches!(err, Error::InvalidFixedArraySignature { .. }));
746 }
747
748 #[test]
749 fn test_fadb_bad_signature() {
750 let header = FaHeader {
751 client_id: 0,
752 entry_size: 8,
753 page_bits: 0,
754 num_entries: 1,
755 data_block_address: 0,
756 };
757 let mut data = vec![0u8; 64];
758 data[0..4].copy_from_slice(b"XXXX");
759 let err = parse_data_block(&data, 0, &header, 8).unwrap_err();
760 assert!(matches!(err, Error::InvalidFixedArraySignature { .. }));
761 }
762}