1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
// SPDX-License-Identifier: Apache-2.0
// Copyright (c) 2024-present, fjall-rs
// Copyright (c) 2026-present, Structured World Foundation
use crate::{
FormatVersion, TreeType, checksum::ChecksumType, manifest_blocks::reader::ManifestArchiveReader,
};
#[cfg(not(feature = "std"))]
use alloc::{
borrow::ToOwned,
string::{String, ToString},
};
pub struct Manifest {
pub version: FormatVersion,
#[cfg_attr(
not(test),
expect(
dead_code,
reason = "deserialized from on-disk manifest, retained for validation; read in tests"
)
)]
pub tree_type: TreeType,
pub level_count: u8,
pub comparator_name: String,
}
impl Manifest {
/// Decode the `Manifest` metadata from a freshly-opened
/// [`ManifestArchiveReader`]. Reads the mandatory sections
/// (`format_version`, `tree_type`, `level_count`,
/// `filter_hash_type`) and the optional `comparator_name`.
///
/// The reader's per-section Block reads already cover XXH3 /
/// optional ECC / optional AEAD; this function only parses the
/// payload bytes.
///
/// # Errors
///
/// - [`crate::Error::ManifestSectionInvalid`] when a mandatory
/// section name is not present in the TOC (the per-section
/// error variant surfaced by
/// [`ManifestArchiveReader::read_section`])
/// - [`crate::Error::InvalidVersion`] when `format_version`
/// carries an unknown discriminant
/// - [`crate::Error::InvalidTag`] for unknown `TreeType` /
/// `ChecksumType` discriminants
/// - [`crate::Error::DecompressedSizeTooLarge`] when
/// `comparator_name` exceeds the configured length cap
/// - [`crate::Error::Utf8`] when `comparator_name` bytes are
/// not valid UTF-8
/// - [`crate::Error::InvalidHeader`] when a single-byte mandatory
/// section is empty / truncated (`format_version`,
/// `tree_type`, `level_count`, `filter_hash_type`)
/// - propagates Block I/O / verification errors from the
/// reader (including [`crate::Error::ManifestFooterInvalid`]
/// for footer-level corruption)
pub fn decode_from(reader: &mut ManifestArchiveReader) -> Result<Self, crate::Error> {
let format_version_bytes = reader.read_section("format_version")?;
let version = {
let v = format_version_bytes
.first()
.copied()
.ok_or(crate::Error::InvalidHeader("format_version"))?;
FormatVersion::try_from(v).map_err(|()| crate::Error::InvalidVersion(v))?
};
let tree_type_bytes = reader.read_section("tree_type")?;
let tree_type = {
let raw = tree_type_bytes
.first()
.copied()
.ok_or(crate::Error::InvalidHeader("tree_type"))?;
raw.try_into()
.map_err(|()| crate::Error::InvalidTag(("TreeType", raw)))?
};
let level_count_bytes = reader.read_section("level_count")?;
// Mirror format_version / tree_type above: a truncated /
// empty section is structural corruption, not generic I/O.
// `Cursor::read_u8` would surface Io(UnexpectedEof) which
// is harder to route at the caller; the InvalidHeader
// variant carries the section name for diagnostics and
// matches the sibling sections' classification.
let level_count = *level_count_bytes
.first()
.ok_or(crate::Error::InvalidHeader("level_count"))?;
// Currently level count is hard coded to 7. The byte comes
// from disk, so a corrupted / forged manifest could carry
// any value here — return InvalidHeader instead of panicking
// so the caller (Tree::open) gets a routable error rather
// than a process abort.
if level_count != 7 {
return Err(crate::Error::InvalidHeader("level_count"));
}
{
let filter_hash_type_bytes = reader.read_section("filter_hash_type")?;
// Only one supported right now (and probably forever).
// Same disk-sourced rationale as `level_count` above —
// surface mismatch as InvalidHeader, not assert.
if filter_hash_type_bytes.as_slice() != [u8::from(ChecksumType::Xxh3)] {
return Err(crate::Error::InvalidHeader("filter_hash_type"));
}
}
// Optional section — absent in manifests written before
// comparator identity persistence was added. The
// `UserComparator` trait was introduced in the same release
// cycle, so all pre-existing trees used
// `DefaultUserComparator` whose `name()` returns "default".
// Custom comparators cannot exist in old manifests.
let comparator_name = match reader.section("comparator_name") {
Some(_entry) => {
let bytes = reader.read_section("comparator_name")?;
let limit = crate::comparator::MAX_COMPARATOR_NAME_BYTES as u64;
if bytes.len() as u64 > limit {
return Err(crate::Error::DecompressedSizeTooLarge {
declared: bytes.len() as u64,
limit,
});
}
String::from_utf8(bytes).map_err(|e| crate::Error::Utf8(e.utf8_error()))?
}
None => "default".to_owned(),
};
Ok(Self {
version,
tree_type,
level_count,
comparator_name,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::io::WriteBytesExt;
use crate::{
fs::{Fs, MemFs, StdFs},
manifest_blocks::writer::ManifestArchiveWriter,
};
use std::{io::Write, path::Path};
/// Write a minimal valid Blocks-based manifest with all four
/// mandatory sections (and optionally a `comparator_name`).
fn write_test_manifest(
path: &Path,
comparator_name: Option<&str>,
fs: &dyn Fs,
) -> crate::Result<()> {
let mut writer = ManifestArchiveWriter::create(
path,
fs,
std::sync::Arc::new(crate::runtime_config::RuntimeConfig::default()),
None,
crate::fs::SyncMode::Normal,
)?;
writer.start("format_version")?;
writer.write_u8(FormatVersion::V5.into())?;
writer.start("tree_type")?;
writer.write_u8(TreeType::Standard.into())?;
writer.start("level_count")?;
writer.write_u8(7)?;
writer.start("filter_hash_type")?;
writer.write_u8(u8::from(ChecksumType::Xxh3))?;
if let Some(name) = comparator_name {
writer.start("comparator_name")?;
writer.write_all(name.as_bytes())?;
}
writer.finish()?;
Ok(())
}
fn decode_manifest(path: &Path, fs: &dyn Fs) -> crate::Result<Manifest> {
let mut reader = ManifestArchiveReader::open(
path,
fs,
std::sync::Arc::new(crate::runtime_config::RuntimeConfig::default()),
None,
)?;
Manifest::decode_from(&mut reader)
}
// ------------------------------------------------------------------
// StdFs tests
// ------------------------------------------------------------------
#[test]
fn manifest_without_comparator_name_defaults_to_default() -> crate::Result<()> {
let dir = tempfile::tempdir()?;
let path = dir.path().join("manifest");
write_test_manifest(&path, None, &StdFs)?;
let manifest = decode_manifest(&path, &StdFs)?;
assert_eq!(manifest.comparator_name, "default");
Ok(())
}
#[test]
fn manifest_with_comparator_name_round_trips() -> crate::Result<()> {
let dir = tempfile::tempdir()?;
let path = dir.path().join("manifest");
write_test_manifest(&path, Some("u64-big-endian"), &StdFs)?;
let manifest = decode_manifest(&path, &StdFs)?;
assert_eq!(manifest.comparator_name, "u64-big-endian");
Ok(())
}
#[test]
fn manifest_rejects_oversized_comparator_name() -> crate::Result<()> {
let dir = tempfile::tempdir()?;
let path = dir.path().join("manifest");
let long_name = "x".repeat(300);
write_test_manifest(&path, Some(&long_name), &StdFs)?;
let result = decode_manifest(&path, &StdFs);
assert!(
matches!(result, Err(crate::Error::DecompressedSizeTooLarge { .. })),
"expected DecompressedSizeTooLarge"
);
Ok(())
}
#[test]
fn manifest_rejects_invalid_utf8_comparator_name() -> crate::Result<()> {
let dir = tempfile::tempdir()?;
let path = dir.path().join("manifest");
// Compose a manifest where the comparator_name section
// carries invalid UTF-8 bytes. Writer accepts arbitrary
// bytes via write_all; the Manifest decoder enforces UTF-8.
let mut writer = ManifestArchiveWriter::create(
&path,
&StdFs,
std::sync::Arc::new(crate::runtime_config::RuntimeConfig::default()),
None,
crate::fs::SyncMode::Normal,
)?;
writer.start("format_version")?;
writer.write_u8(FormatVersion::V5.into())?;
writer.start("tree_type")?;
writer.write_u8(TreeType::Standard.into())?;
writer.start("level_count")?;
writer.write_u8(7)?;
writer.start("filter_hash_type")?;
writer.write_u8(u8::from(ChecksumType::Xxh3))?;
writer.start("comparator_name")?;
writer.write_all(&[0xFF, 0xFE])?;
writer.finish()?;
let result = decode_manifest(&path, &StdFs);
assert!(
matches!(result, Err(crate::Error::Utf8(_))),
"expected Utf8 error"
);
Ok(())
}
// ------------------------------------------------------------------
// MemFs tests — verify decode_from works with non-StdFs backends
// ------------------------------------------------------------------
#[test]
fn manifest_memfs_default_comparator() -> crate::Result<()> {
let fs = MemFs::new();
let dir = Path::new("/memfs");
fs.create_dir_all(dir)?;
let path = dir.join("manifest_default");
write_test_manifest(&path, None, &fs)?;
let manifest = decode_manifest(&path, &fs)?;
assert_eq!(manifest.comparator_name, "default");
assert_eq!(manifest.level_count, 7);
assert!(matches!(manifest.version, FormatVersion::V5));
assert!(matches!(manifest.tree_type, TreeType::Standard));
Ok(())
}
#[test]
fn manifest_memfs_custom_comparator_round_trips() -> crate::Result<()> {
let fs = MemFs::new();
let dir = Path::new("/memfs");
fs.create_dir_all(dir)?;
let path = dir.join("manifest_custom");
write_test_manifest(&path, Some("u64-big-endian"), &fs)?;
let manifest = decode_manifest(&path, &fs)?;
assert_eq!(manifest.comparator_name, "u64-big-endian");
Ok(())
}
}