#[cfg(not(feature = "std"))]
use crate::io::{Cursor, Read, Seek, SeekFrom};
use crate::io::{LittleEndian, ReadBytesExt};
use crate::path::{Path, PathBuf};
use crate::{
encryption::EncryptionProvider,
fs::{Fs, FsFile, FsOpenOptions},
manifest_blocks::{
HEAD_FOOTER_RESERVED_SIZE, MANIFEST_TABLE_ID_SENTINEL, MAX_MANIFEST_BLOCK_SIZE,
TAIL_FOOTER_SIZE_HINT_BYTES,
footer::{FooterPayload, TocEntry},
},
runtime_config::RuntimeConfig,
table::block::{Block, BlockIdentity, BlockTransform, BlockType, Header},
};
use alloc::sync::Arc;
#[cfg(not(feature = "std"))]
use alloc::{boxed::Box, vec::Vec};
#[cfg(feature = "std")]
use std::io::{Cursor, Read, Seek, SeekFrom};
pub struct ManifestArchiveReader {
path: PathBuf,
file: Box<dyn FsFile>,
footer: FooterPayload,
source: FooterSource,
runtime: Arc<RuntimeConfig>,
encryption: Option<Arc<dyn EncryptionProvider>>,
}
impl core::fmt::Debug for ManifestArchiveReader {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("ManifestArchiveReader")
.field("path", &self.path)
.field("source", &self.source)
.field("footer", &self.footer)
.finish_non_exhaustive()
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum FooterSource {
Tail,
Head,
}
impl ManifestArchiveReader {
pub fn open(
path: &Path,
fs: &dyn Fs,
runtime: Arc<RuntimeConfig>,
encryption: Option<Arc<dyn EncryptionProvider>>,
) -> crate::Result<Self> {
let mut file = fs.open(path, &FsOpenOptions::new().read(true))?;
let file_len = file_size(fs, path)?;
if file_len < HEAD_FOOTER_RESERVED_SIZE {
return Err(crate::Error::Unrecoverable);
}
let footer_transform = build_transform(&runtime, encryption.as_deref());
let tail_err = if file_len < HEAD_FOOTER_RESERVED_SIZE + TAIL_FOOTER_SIZE_HINT_BYTES {
log::debug!(
"manifest {} is too short for a tail size hint ({} bytes); skipping tail path, trying head mirror",
path.display(),
file_len,
);
crate::Error::ManifestFooterInvalid(
"tail size hint absent — file truncated to head reservation only",
)
} else {
match read_tail_footer(&mut file, file_len, &footer_transform) {
Ok(footer) => {
return Ok(Self {
path: path.to_path_buf(),
file,
footer,
source: FooterSource::Tail,
runtime,
encryption,
});
}
Err(err) => {
log::debug!(
"manifest tail footer read failed for {}: {err:?}; trying head mirror",
path.display(),
);
err
}
}
};
match read_head_footer(&mut file, &footer_transform) {
Ok(footer) => Ok(Self {
path: path.to_path_buf(),
file,
footer,
source: FooterSource::Head,
runtime,
encryption,
}),
Err(head_err) => {
log::error!(
"manifest unrecoverable for {}: tail err = {:?}, head err = {head_err:?}",
path.display(),
tail_err,
);
Err(crate::Error::ManifestFooterInvalid(
"both tail and head mirror failed to produce a valid footer payload",
))
}
}
}
#[must_use]
pub fn source(&self) -> FooterSource {
self.source
}
#[must_use]
pub fn footer(&self) -> &FooterPayload {
&self.footer
}
#[must_use]
pub fn path(&self) -> &Path {
&self.path
}
#[must_use]
pub fn section(&self, name: &str) -> Option<&TocEntry> {
self.footer.section(name)
}
pub fn read_section(&mut self, name: &str) -> crate::Result<Vec<u8>> {
let entry = self
.footer
.section(name)
.ok_or(crate::Error::ManifestSectionInvalid(
"requested section name not in TOC",
))?;
let block_offset = entry.block_offset;
let block_size = entry.block_size;
let expected_section_checksum = entry.section_checksum;
if block_size > MAX_MANIFEST_BLOCK_SIZE {
return Err(crate::Error::ManifestFooterInvalid(
"TOC entry block_size exceeds MAX_MANIFEST_BLOCK_SIZE",
));
}
let file_len = self.file.metadata()?.len;
let end = block_offset.checked_add(u64::from(block_size)).ok_or(
crate::Error::ManifestFooterInvalid("TOC entry overflows u64 file offset"),
)?;
if end > file_len {
return Err(crate::Error::ManifestFooterInvalid(
"TOC entry extends past end of manifest file",
));
}
self.file.seek(SeekFrom::Start(block_offset))?;
let mut block_bytes = vec![0u8; block_size as usize];
self.file.read_exact(&mut block_bytes)?;
validate_block_header_fits(&block_bytes, HeaderContext::SectionExact)?;
let identity = BlockIdentity {
table_id: MANIFEST_TABLE_ID_SENTINEL,
block_type: BlockType::Manifest,
dict_id: 0,
window_log: 0,
};
let block = Block::from_reader(
&mut Cursor::new(&block_bytes),
identity,
&build_transform(&self.runtime, self.encryption.as_deref()),
)?;
if block.header.block_type != BlockType::Manifest {
return Err(crate::Error::ManifestSectionInvalid(
"TOC entry points at non-Manifest block",
));
}
if block.header.checksum.into_u128() != expected_section_checksum {
return Err(crate::Error::ManifestSectionInvalid(
"section Block checksum does not match TOC entry section_checksum",
));
}
Ok(block.data.to_vec())
}
}
#[derive(Copy, Clone)]
enum HeaderContext {
SectionExact,
FooterExact,
FooterPadded,
}
fn validate_block_header_fits(buf: &[u8], ctx: HeaderContext) -> crate::Result<()> {
use crate::coding::Decode;
let wrap = |msg: &'static str| -> crate::Error {
match ctx {
HeaderContext::SectionExact => crate::Error::ManifestSectionInvalid(msg),
HeaderContext::FooterExact | HeaderContext::FooterPadded => {
crate::Error::ManifestFooterInvalid(msg)
}
}
};
if buf.len() < Header::MIN_LEN {
return Err(wrap("manifest Block buffer shorter than Block header"));
}
let cursor_end = Header::MAX_LEN.min(buf.len());
let mut cursor = Cursor::new(
buf.get(..cursor_end)
.ok_or_else(|| wrap("manifest Block header slice unexpectedly short"))?,
);
let header = Header::decode_from(&mut cursor).map_err(|_| {
wrap("manifest Block header decode failed (truncated, unknown type, or invalid magic)")
})?;
let declared = u64::from(header.on_disk_size());
let buf_len = buf.len() as u64;
match ctx {
HeaderContext::SectionExact | HeaderContext::FooterExact => {
if declared != buf_len {
return Err(wrap(
"manifest Block header on-disk size does not match its exact slot",
));
}
}
HeaderContext::FooterPadded => {
if declared > buf_len {
return Err(wrap(
"manifest Block header declares on-disk size larger than buffer",
));
}
#[expect(
clippy::cast_possible_truncation,
reason = "declared <= buf_len = buf.len(), so it fits usize"
)]
let declared_usize = declared as usize;
if buf
.get(declared_usize..)
.unwrap_or(&[])
.iter()
.any(|&b| b != 0)
{
return Err(wrap(
"manifest head-mirror footer has non-zero bytes past the declared block size",
));
}
}
}
Ok(())
}
fn build_transform<'a>(
runtime: &RuntimeConfig,
encryption: Option<&'a dyn EncryptionProvider>,
) -> BlockTransform<'a> {
#[cfg(feature = "page_ecc")]
let ecc_on = runtime.manifest_ecc();
#[cfg(not(feature = "page_ecc"))]
let _ = runtime;
#[cfg(not(feature = "page_ecc"))]
let ecc_on = false;
match (ecc_on, encryption) {
#[cfg(feature = "page_ecc")]
(true, Some(enc)) => {
BlockTransform::EncryptedEcc(enc, crate::table::block::EccParams::RS_4_2)
}
#[cfg(feature = "page_ecc")]
(true, None) => BlockTransform::PlainEcc(crate::table::block::EccParams::RS_4_2),
(_, Some(enc)) => BlockTransform::Encrypted(enc),
(_, None) => BlockTransform::PLAIN,
}
}
fn read_tail_footer(
file: &mut Box<dyn FsFile>,
file_len: u64,
transform: &BlockTransform<'_>,
) -> crate::Result<FooterPayload> {
file.seek(SeekFrom::Start(file_len - TAIL_FOOTER_SIZE_HINT_BYTES))?;
let footer_size = u64::from(file.read_u32::<LittleEndian>()?);
let max_footer = file_len
.checked_sub(HEAD_FOOTER_RESERVED_SIZE)
.and_then(|len_after_head| len_after_head.checked_sub(TAIL_FOOTER_SIZE_HINT_BYTES))
.ok_or(crate::Error::ManifestFooterInvalid(
"file too small to hold footer between head reservation and size hint",
))?;
if footer_size == 0 || footer_size > max_footer {
return Err(crate::Error::ManifestFooterInvalid(
"trailing footer-size hint out of bounds",
));
}
if footer_size > HEAD_FOOTER_RESERVED_SIZE {
return Err(crate::Error::ManifestFooterInvalid(
"footer-size hint exceeds HEAD_FOOTER_RESERVED_SIZE",
));
}
let footer_offset = file_len
.checked_sub(TAIL_FOOTER_SIZE_HINT_BYTES)
.and_then(|x| x.checked_sub(footer_size))
.ok_or(crate::Error::ManifestFooterInvalid(
"trailing footer-size hint underflows file length",
))?;
file.seek(SeekFrom::Start(footer_offset))?;
#[expect(
clippy::cast_possible_truncation,
reason = "footer_size <= HEAD_FOOTER_RESERVED_SIZE = 4 KiB, fits any platform's usize"
)]
let mut footer_buf = vec![0u8; footer_size as usize];
file.read_exact(&mut footer_buf)?;
validate_block_header_fits(&footer_buf, HeaderContext::FooterExact)?;
let identity = BlockIdentity {
table_id: MANIFEST_TABLE_ID_SENTINEL,
block_type: BlockType::ManifestFooter,
dict_id: 0,
window_log: 0,
};
let block = Block::from_reader(&mut Cursor::new(&footer_buf), identity, transform)?;
if block.header.block_type != BlockType::ManifestFooter {
return Err(crate::Error::ManifestFooterInvalid(
"tail footer slot decoded as non-ManifestFooter block",
));
}
FooterPayload::decode(&block.data[..])
}
fn read_head_footer(
file: &mut Box<dyn FsFile>,
transform: &BlockTransform<'_>,
) -> crate::Result<FooterPayload> {
file.seek(SeekFrom::Start(0))?;
#[expect(
clippy::cast_possible_truncation,
reason = "HEAD_FOOTER_RESERVED_SIZE = 4 KiB, fits in usize on every supported target"
)]
let mut head_buf = vec![0u8; HEAD_FOOTER_RESERVED_SIZE as usize];
file.read_exact(&mut head_buf)?;
if head_buf.iter().all(|&b| b == 0) {
return Err(crate::Error::ManifestFooterInvalid(
"head mirror unpopulated (manifest_footer_mirror was off at write)",
));
}
validate_block_header_fits(&head_buf, HeaderContext::FooterPadded)?;
let identity = BlockIdentity {
table_id: MANIFEST_TABLE_ID_SENTINEL,
block_type: BlockType::ManifestFooter,
dict_id: 0,
window_log: 0,
};
let block = Block::from_reader(&mut Cursor::new(&head_buf), identity, transform)?;
if block.header.block_type != BlockType::ManifestFooter {
return Err(crate::Error::ManifestFooterInvalid(
"head mirror slot decoded as non-ManifestFooter block",
));
}
FooterPayload::decode(&block.data[..])
}
fn file_size(fs: &dyn Fs, path: &Path) -> crate::Result<u64> {
Ok(fs.metadata(path)?.len)
}
#[cfg(test)]
#[expect(
clippy::unwrap_used,
clippy::expect_used,
clippy::items_after_statements,
reason = "tests panic on failure paths to surface bugs loudly; \
localized `use std::io::Write;` reads natural at the call site"
)]
mod tests {
use super::*;
use crate::{
fs::MemFs, manifest_blocks::writer::ManifestArchiveWriter, runtime_config::RuntimeConfig,
};
use std::sync::Arc;
fn fresh_fs() -> MemFs {
let fs = MemFs::new();
fs.create_dir_all(Path::new("/m")).unwrap();
fs
}
#[test]
fn validate_block_header_fits_rejects_understated_exact_slot() {
use crate::coding::Encode;
use crate::table::block::{BlockType, Header};
let header = Header {
data_length: 4,
uncompressed_length: 4,
..Header::test_dummy(BlockType::ManifestFooter)
};
let mut buf = header.encode_into_vec();
buf.extend_from_slice(&[0u8; 4]); buf.extend_from_slice(&[0xAB, 0xCD]);
assert!(
validate_block_header_fits(&buf, HeaderContext::SectionExact).is_err(),
"an understated header in an exact-fit section slot must be rejected",
);
assert!(
validate_block_header_fits(&buf, HeaderContext::FooterExact).is_err(),
"an understated header in an exact-fit footer slot must be rejected",
);
assert!(
validate_block_header_fits(&buf, HeaderContext::FooterPadded).is_err(),
"padded slot must reject non-zero bytes past the declared block size",
);
let mut zero_padded = header.encode_into_vec();
zero_padded.extend_from_slice(&[0u8; 4]); zero_padded.extend_from_slice(&[0u8; 2]); assert!(
validate_block_header_fits(&zero_padded, HeaderContext::FooterPadded).is_ok(),
"padded slot accepts a smaller declared size with genuine zero padding",
);
}
fn write_manifest(fs: &MemFs, path: &Path, runtime: RuntimeConfig, sections: &[(&str, &[u8])]) {
let mut w = ManifestArchiveWriter::create(
path,
fs,
Arc::new(runtime),
None,
crate::fs::SyncMode::Normal,
)
.unwrap();
for (name, data) in sections {
w.start(name).unwrap();
use std::io::Write;
w.write_all(data).unwrap();
}
w.finish().unwrap();
}
#[test]
fn reader_opens_clean_manifest_via_tail() {
let fs = fresh_fs();
let path = Path::new("/m/clean");
write_manifest(
&fs,
path,
RuntimeConfig::default(),
&[("format_version", &[5]), ("tree_type", &[0])],
);
let reader = ManifestArchiveReader::open(
path,
&fs,
std::sync::Arc::new(crate::runtime_config::RuntimeConfig::default()),
None,
)
.unwrap();
assert_eq!(reader.source(), FooterSource::Tail);
assert!(reader.section("format_version").is_some());
assert!(reader.section("tree_type").is_some());
assert!(reader.section("nonexistent").is_none());
}
#[test]
fn reader_reads_section_bytes_verbatim() {
let fs = fresh_fs();
let path = Path::new("/m/roundtrip");
write_manifest(
&fs,
path,
RuntimeConfig::default(),
&[
("format_version", &[5]),
("comparator_name", b"u64-big-endian"),
],
);
let mut reader = ManifestArchiveReader::open(
path,
&fs,
std::sync::Arc::new(crate::runtime_config::RuntimeConfig::default()),
None,
)
.unwrap();
assert_eq!(reader.read_section("format_version").unwrap(), vec![5]);
assert_eq!(
reader.read_section("comparator_name").unwrap(),
b"u64-big-endian".to_vec(),
);
}
#[test]
fn reader_falls_back_to_head_mirror_when_tail_corrupt() {
let fs = fresh_fs();
let path = Path::new("/m/tail_corrupt");
write_manifest(
&fs,
path,
RuntimeConfig::default(), &[("format_version", &[5])],
);
let mut file = fs
.open(path, &FsOpenOptions::new().write(true).read(true))
.unwrap();
let size = file.metadata().unwrap().len;
file.seek(SeekFrom::Start(size - 4)).unwrap();
use std::io::Write;
file.write_all(&[0xFF, 0xFF, 0xFF, 0xFF]).unwrap();
file.sync_all().unwrap();
drop(file);
let reader = ManifestArchiveReader::open(
path,
&fs,
std::sync::Arc::new(crate::runtime_config::RuntimeConfig::default()),
None,
)
.unwrap();
assert_eq!(
reader.source(),
FooterSource::Head,
"reader should have fallen back to the head mirror"
);
assert!(reader.section("format_version").is_some());
}
#[test]
fn reader_fails_when_tail_corrupt_and_no_mirror() {
let fs = fresh_fs();
let path = Path::new("/m/tail_corrupt_no_mirror");
let runtime = RuntimeConfig {
manifest_footer_mirror: false,
..RuntimeConfig::default()
};
write_manifest(&fs, path, runtime, &[("format_version", &[5])]);
let mut file = fs
.open(path, &FsOpenOptions::new().write(true).read(true))
.unwrap();
let size = file.metadata().unwrap().len;
file.seek(SeekFrom::Start(size - 4)).unwrap();
use std::io::Write;
file.write_all(&[0xFF, 0xFF, 0xFF, 0xFF]).unwrap();
file.sync_all().unwrap();
drop(file);
let err = ManifestArchiveReader::open(
path,
&fs,
std::sync::Arc::new(crate::runtime_config::RuntimeConfig::default()),
None,
)
.expect_err("must reject");
assert!(matches!(err, crate::Error::ManifestFooterInvalid(_)));
}
#[test]
fn reader_rejects_request_for_missing_section() {
let fs = fresh_fs();
let path = Path::new("/m/missing");
write_manifest(&fs, path, RuntimeConfig::default(), &[("a", &[1])]);
let mut reader = ManifestArchiveReader::open(
path,
&fs,
std::sync::Arc::new(crate::runtime_config::RuntimeConfig::default()),
None,
)
.unwrap();
let err = reader
.read_section("does_not_exist")
.expect_err("missing section must error");
assert!(matches!(err, crate::Error::ManifestSectionInvalid(_)));
}
#[test]
fn reader_isolates_corruption_to_one_section_other_sections_readable() {
let fs = fresh_fs();
let path = Path::new("/m/isolated");
write_manifest(
&fs,
path,
RuntimeConfig::default(),
&[
("a", &[1, 2, 3, 4]),
("b", &[5, 6, 7, 8]),
("c", &[9, 10, 11, 12]),
],
);
let b_offset = {
let reader =
ManifestArchiveReader::open(path, &fs, Arc::new(RuntimeConfig::default()), None)
.unwrap();
let entry = reader.section("b").expect("b section is in TOC");
entry.block_offset
};
let payload_off =
b_offset + Header::header_len(crate::table::block::BlockType::Manifest) as u64;
{
let mut file = fs
.open(path, &FsOpenOptions::new().write(true).read(true))
.unwrap();
file.seek(SeekFrom::Start(payload_off)).unwrap();
let mut byte = [0u8; 1];
file.read_exact(&mut byte).unwrap();
file.seek(SeekFrom::Start(payload_off)).unwrap();
file.write_all(&[byte[0] ^ 0xFF]).unwrap();
file.sync_all().unwrap();
}
let mut reader =
ManifestArchiveReader::open(path, &fs, Arc::new(RuntimeConfig::default()), None)
.unwrap();
let a_bytes = reader.read_section("a").unwrap();
assert_eq!(a_bytes, vec![1, 2, 3, 4], "section a survives");
let c_bytes = reader.read_section("c").unwrap();
assert_eq!(c_bytes, vec![9, 10, 11, 12], "section c survives");
let b_err = reader
.read_section("b")
.expect_err("section b decoded but should have failed XXH3");
log::debug!("section b corruption surfaced as: {b_err:?}");
}
#[test]
fn reader_rejects_files_smaller_than_head_reservation() {
let fs = fresh_fs();
let path = Path::new("/m/too_small");
let mut file = fs
.open(path, &FsOpenOptions::new().write(true).create_new(true))
.unwrap();
use std::io::Write;
file.write_all(&[0u8; 100]).unwrap(); file.sync_all().unwrap();
drop(file);
let err = ManifestArchiveReader::open(
path,
&fs,
std::sync::Arc::new(crate::runtime_config::RuntimeConfig::default()),
None,
)
.expect_err("must reject");
assert!(matches!(err, crate::Error::Unrecoverable));
}
#[cfg(feature = "encryption")]
#[test]
fn reader_falls_back_to_head_mirror_for_encrypted_manifest() {
use crate::encryption::{Aes256GcmProvider, EncryptionProvider};
let fs = fresh_fs();
let path = Path::new("/m/enc_tail_corrupt");
let key = [42u8; 32];
let enc: Arc<dyn EncryptionProvider> = Arc::new(Aes256GcmProvider::new(&key));
let mut w = ManifestArchiveWriter::create(
path,
&fs,
Arc::new(RuntimeConfig::default()),
Some(Arc::clone(&enc)),
crate::fs::SyncMode::Normal,
)
.unwrap();
w.start("format_version").unwrap();
use std::io::Write;
w.write_all(&[5u8]).unwrap();
w.finish().unwrap();
let mut file = fs
.open(path, &FsOpenOptions::new().write(true).read(true))
.unwrap();
let size = file.metadata().unwrap().len;
file.seek(SeekFrom::Start(size - 4)).unwrap();
file.write_all(&[0xFF, 0xFF, 0xFF, 0xFF]).unwrap();
file.sync_all().unwrap();
drop(file);
let reader =
ManifestArchiveReader::open(path, &fs, Arc::new(RuntimeConfig::default()), Some(enc))
.expect("encrypted head-mirror fallback must decrypt cleanly");
assert_eq!(
reader.source(),
FooterSource::Head,
"reader should have fallen back to the head mirror"
);
assert!(reader.section("format_version").is_some());
}
#[test]
fn reader_recovers_from_head_when_tail_hint_missing() {
let fs = fresh_fs();
let path = Path::new("/m/head_only");
write_manifest(
&fs,
path,
RuntimeConfig::default(),
&[("format_version", &[5])],
);
let file = fs
.open(path, &FsOpenOptions::new().write(true).read(true))
.unwrap();
file.set_len(HEAD_FOOTER_RESERVED_SIZE).unwrap();
file.sync_all().unwrap();
drop(file);
let reader =
ManifestArchiveReader::open(path, &fs, Arc::new(RuntimeConfig::default()), None)
.expect("head-only manifest must recover via head fallback");
assert_eq!(
reader.source(),
FooterSource::Head,
"reader should have fallen back to the head mirror"
);
assert!(reader.section("format_version").is_some());
}
#[cfg(feature = "page_ecc")]
#[test]
fn reader_reads_section_when_manifest_ecc_enabled_returns_verbatim_bytes() {
let fs = fresh_fs();
let path = Path::new("/m/ecc_plain");
let runtime = RuntimeConfig {
page_ecc: true,
..RuntimeConfig::default()
};
write_manifest(
&fs,
path,
runtime.clone(),
&[("format_version", &[5]), ("tree_type", &[0])],
);
let mut reader = ManifestArchiveReader::open(path, &fs, Arc::new(runtime), None).unwrap();
assert_eq!(reader.read_section("format_version").unwrap(), vec![5]);
assert_eq!(reader.read_section("tree_type").unwrap(), vec![0]);
}
#[cfg(all(feature = "page_ecc", feature = "encryption"))]
#[test]
fn reader_reads_encrypted_section_when_manifest_ecc_enabled_returns_verbatim_bytes() {
use crate::encryption::{Aes256GcmProvider, EncryptionProvider};
let fs = fresh_fs();
let path = Path::new("/m/ecc_enc");
let enc: Arc<dyn EncryptionProvider> = Arc::new(Aes256GcmProvider::new(&[7u8; 32]));
let runtime = RuntimeConfig {
page_ecc: true,
..RuntimeConfig::default()
};
let mut w = ManifestArchiveWriter::create(
path,
&fs,
Arc::new(runtime.clone()),
Some(Arc::clone(&enc)),
crate::fs::SyncMode::Normal,
)
.unwrap();
w.start("format_version").unwrap();
use std::io::Write;
w.write_all(&[5u8]).unwrap();
w.finish().unwrap();
let mut reader =
ManifestArchiveReader::open(path, &fs, Arc::new(runtime), Some(enc)).unwrap();
assert_eq!(reader.read_section("format_version").unwrap(), vec![5]);
}
}