1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
// SPDX-License-Identifier: Apache-2.0
// Copyright (c) 2026-present, Structured World Foundation
//! Blocks-based manifest framing (V5-2, #297).
//!
//! Replaces the upstream `sfa` sectioned-archive file format for the
//! per-version manifest files (`v{N}`). Each manifest is a sequence
//! of standard lsm-tree [`Block`](crate::table::block::Block)s:
//!
//! ```text
//! file layout (manifest_layout_version = 1):
//! [0 .. HEAD_FOOTER_RESERVED_SIZE] head footer mirror (4 KiB,
//! zero-padded; populated only when
//! runtime `manifest_footer_mirror`
//! is enabled)
//! [HEAD_FOOTER_RESERVED_SIZE ..] section Block 0
//! section Block 1
//! ...
//! section Block N
//! [.. EOF] tail footer Block (primary read
//! target; carries the TOC of
//! section offsets and the manifest
//! layout version)
//! ```
//!
//! All Block-level protections (XXH3-128 checksum, optional ECC, optional
//! AEAD) apply through the standard [`Block::write_into`] /
//! [`Block::from_reader`] pipeline.
//! Manifest gets bit-rot defence + (optional) encryption + (optional)
//! single-block recovery "for free" by reusing existing infrastructure.
//!
//! Section names mirror the previous sfa archive's section names so
//! existing callers in `crate::Manifest` / `crate::version::recovery`
//! see the same logical surface during the migration; only the underlying
//! framing changes.
//!
//! [`Block::write_into`]: crate::table::block::Block::write_into
//! [`Block::from_reader`]: crate::table::block::Block::from_reader
/// Manifest file layout version carried in the footer payload.
///
/// Bumped only when the manifest file layout itself evolves
/// (footer fields, TOC encoding, head-mirror geometry); decoupled
/// from the crate-level [`crate::FormatVersion`] which tracks
/// block / SST layout.
///
/// ## Amendment policy (same rule as [`crate::FormatVersion`])
///
/// Pre-release amendments are free: while no published binary
/// writes this value, the on-disk bytes under it MAY be amended
/// in place. Once the value ships to crates.io, **any** subsequent
/// change to the on-disk manifest layout under it is breaking and
/// MUST introduce a new `MANIFEST_LAYOUT_VERSION_V2` constant
/// (and `_V3`, `_V4`, ... for each subsequent post-release break,
/// incrementing the numeric suffix every time) — even if the change
/// is otherwise additive. The CURRENT
/// pointer's canonical digest binds this value, so a layout-only
/// break is detected at recovery without requiring a
/// [`crate::FormatVersion`] bump.
pub const MANIFEST_LAYOUT_VERSION_V1: u8 = 1;
/// Fixed-size reservation at file offset 0 for the head footer mirror.
///
/// 4 KiB matches typical filesystem block size and page-alignment
/// for direct-IO compatibility.
///
/// Hard limit on footer Block size — see footer encode path for
/// the safety-net check that rejects payloads that would overflow
/// this region. Hitting that limit signals a writer bug or forged
/// manifest, not a legitimate capacity exhaustion: realistic
/// production manifests use ~5% of the reserved space.
pub const HEAD_FOOTER_RESERVED_SIZE: u64 = 4 * 1024;
/// Footer payload flag: bit 0 indicates the head mirror at file
/// offset 0 was populated by the writer.
///
/// When clear, readers skip the head-fallback path on tail-verify
/// failure.
pub const FLAG_FOOTER_MIRROR_ENABLED: u8 = 1 << 0;
/// Hard cap on the on-disk size of a single manifest section Block.
///
/// Realistic production manifests carry KB-scale sections (table
/// list, blob-file list, format metadata); the largest plausible
/// section is the `tables` block on a heavily-populated tree, which
/// still sits comfortably under 16 MiB even with thousands of
/// tables. Capping here keeps the reader from ever allocating a
/// multi-hundred-MiB buffer driven by a forged or corrupted TOC.
///
/// Bumped only when `manifest_layout_version` changes — increasing
/// it is additive (older readers reject the bigger block as
/// oversized, newer readers accept it).
pub const MAX_MANIFEST_BLOCK_SIZE: u32 = 16 * 1024 * 1024;
/// Size in bytes of the trailing footer-size pointer.
///
/// Written at the very end of every manifest file (a little-endian
/// `u32`). The reader reads these last 4 bytes first to discover
/// the footer Block's on-disk size, then seeks to
/// `file_len - 4 - size` to position itself at the footer Block
/// start. Without this hint the reader would have to scan backwards
/// through the file looking for the footer's magic header.
pub const TAIL_FOOTER_SIZE_HINT_BYTES: u64 = 4;
/// Maximum length in bytes of a section name.
///
/// The UTF-8 bytes stored in each TOC entry. Generous cap that
/// holds every name the current writer emits (`format_version`,
/// `tree_type`, `level_count`, `filter_hash_type`,
/// `comparator_name`, `tables`, `blob_files`, `blob_gc_stats`) with
/// room to spare for additive growth. Hitting this cap signals a
/// programming error rather than a legitimate need; bump in a
/// layout-version-2 if real production names ever approach it.
pub const MAX_SECTION_NAME_BYTES: usize = 64;
/// AAD table-id sentinel used for manifest Blocks: `u64::MAX`.
///
/// The manifest is not an SST and has no `TableId`; the sentinel
/// keeps the per-block AAD discriminator non-zero so cross-format
/// substitution between manifest and data Blocks fails AEAD verify.
pub const MANIFEST_TABLE_ID_SENTINEL: u64 = u64MAX;