sherlock_nsf_parser/superblock.rs
1//! Superblock parsing.
2//!
3//! The superblock is the second-tier metadata structure in an NSF database.
4//! [`crate::info2::Information2`] holds the file positions of **four**
5//! superblock copies: Domino writes all four on every commit so a crash
6//! leaves at least one valid. The freshest by `modification_time` is the
7//! authoritative copy; the other three are write-ahead-log redundancy.
8//!
9//! The freshest superblock is the entry point for Bucket Descriptor Table
10//! (BDT) walking - it carries the page-count fields plus the data RRV
11//! bucket position used for modern-ODS note enumeration. The `bdt_position`
12//! field in DBINFO is always zero on modern ODS; only the superblock
13//! resolves bucket_index -> file_offset.
14//!
15//! Header layout (100 bytes, LE throughout) per
16//! `libnsfdb/nsfdb_superblock.h::nsfdb_superblock_header`:
17//!
18//! ```text
19//! offset width field
20//! 0 2 signature (0x0E 0x00)
21//! 2 8 modification_time (TIMEDATE)
22//! 10 4 uncompressed_size
23//! 14 4 number_of_summary_buckets
24//! 18 4 number_of_non_summary_buckets
25//! 22 4 number_of_bitmaps
26//! 26 4 rrv_bucket_size
27//! 30 4 data_rrv_bucket_position (256-byte units)
28//! 34 4 rrv_identifier_low
29//! 38 4 rrv_identifier_high
30//! 42 4 bitmap_size
31//! 46 4 data_note_identifier_table_size
32//! 50 4 modified_note_log_size
33//! 54 4 folder_directory_object_size
34//! 58 2 flags
35//! 60 4 write_count
36//! 64 4 size
37//! 68 2 compression_type
38//! 70 4 number_of_summary_bucket_descriptor_pages
39//! 74 4 number_of_non_summary_bucket_descriptor_pages
40//! 78 4 number_of_soft_deleted_note_entries
41//! 82 2 shared_template_information_size
42//! 84 2 unknown1
43//! 86 2 number_of_form_names
44//! 88 4 form_bitmap_size
45//! 92 8 unknown2
46//! ```
47//!
48//! Footer layout (12 bytes) per
49//! `libnsfdb/nsfdb_superblock.h::nsfdb_superblock_footer`:
50//!
51//! ```text
52//! offset width field
53//! 0 8 modification_time (TIMEDATE)
54//! 4 4 checksum (XOR-32 of superblock body)
55//! ```
56//!
57//! Note: the NSF_HANDOFF.md document lists the header as 110 bytes; the
58//! authoritative `nsfdb_superblock.h` struct sums to **100 bytes** for
59//! the header plus 12 for the footer. This module trusts the struct.
60
61use crate::error::NsfError;
62use crate::time::Timedate;
63
64/// Magic two bytes at offset 0 of every superblock header.
65pub const SUPERBLOCK_SIGNATURE: [u8; 2] = [0x0E, 0x00];
66/// On-disk size of the superblock header in bytes.
67pub const SUPERBLOCK_HEADER_BYTES: usize = 100;
68/// On-disk size of the superblock footer in bytes.
69pub const SUPERBLOCK_FOOTER_BYTES: usize = 12;
70
71/// Parsed superblock header. Field naming mirrors the libnsfdb struct.
72#[derive(Debug, Clone, Copy, PartialEq, Eq)]
73pub struct Superblock {
74 /// Most recent modification time. Used to select the freshest of the
75 /// four superblock copies via [`select_freshest`].
76 pub modification_time: Timedate,
77 /// Uncompressed size of the superblock body (when compressed).
78 pub uncompressed_size: u32,
79 /// Total summary buckets allocated in the database.
80 pub number_of_summary_buckets: u32,
81 /// Total non-summary buckets allocated in the database.
82 pub number_of_non_summary_buckets: u32,
83 /// Total bitmaps allocated in the database.
84 pub number_of_bitmaps: u32,
85 /// Size in bytes of each RRV bucket. Should match DBINFO's
86 /// `rrv_bucket_size`; cross-validate at the database layer.
87 pub rrv_bucket_size: u32,
88 /// Data RRV bucket position in 256-byte units. Multiply by 256 for
89 /// the byte offset. Should match DBINFO's `data_rrv_bucket_position`
90 /// for non-fresh templates.
91 pub data_rrv_bucket_position: u32,
92 /// Lower 32 bits of the next-available RRV identifier counter.
93 pub rrv_identifier_low: u32,
94 /// Upper 32 bits of the next-available RRV identifier counter.
95 pub rrv_identifier_high: u32,
96 /// Allocation-bitmap size in bytes.
97 pub bitmap_size: u32,
98 /// Data Note Identifier Table size in bytes.
99 pub data_note_identifier_table_size: u32,
100 /// Modified-note-log size in bytes.
101 pub modified_note_log_size: u32,
102 /// Folder Directory Object (FDO) size in bytes.
103 pub folder_directory_object_size: u32,
104 /// Flags word.
105 pub flags: u16,
106 /// Write-count counter. Increments on each superblock commit.
107 pub write_count: u32,
108 /// Total size in bytes of this superblock (header + body + footer).
109 pub size: u32,
110 /// Compression type of the superblock body. Typically zero
111 /// (uncompressed) on modern Domino.
112 pub compression_type: u16,
113 /// Number of summary-bucket-descriptor pages reachable via this
114 /// superblock. Used by Slice 2.6 Phase B for BDT walking.
115 pub number_of_summary_bucket_descriptor_pages: u32,
116 /// Number of non-summary-bucket-descriptor pages reachable via this
117 /// superblock.
118 pub number_of_non_summary_bucket_descriptor_pages: u32,
119 /// Number of soft-deleted note entries (notes moved to $Trash with
120 /// retention period still active).
121 pub number_of_soft_deleted_note_entries: u32,
122 /// Shared-template-information block size in bytes.
123 pub shared_template_information_size: u16,
124 /// Number of form names cached at the superblock level.
125 pub number_of_form_names: u16,
126 /// Form-bitmap size in bytes.
127 pub form_bitmap_size: u32,
128}
129
130impl Superblock {
131 /// Parse a superblock header from the first
132 /// [`SUPERBLOCK_HEADER_BYTES`] of a buffer positioned at the
133 /// superblock's file offset.
134 pub fn parse(bytes: &[u8]) -> Result<Self, NsfError> {
135 if bytes.len() < SUPERBLOCK_HEADER_BYTES {
136 return Err(NsfError::TooShort {
137 actual: bytes.len(),
138 required: SUPERBLOCK_HEADER_BYTES,
139 });
140 }
141 if bytes[0] != SUPERBLOCK_SIGNATURE[0] || bytes[1] != SUPERBLOCK_SIGNATURE[1] {
142 return Err(NsfError::BadSubrecordSignature {
143 kind: "superblock",
144 expected: SUPERBLOCK_SIGNATURE,
145 observed: [bytes[0], bytes[1]],
146 });
147 }
148
149 let u16_at = |o: usize| u16::from_le_bytes([bytes[o], bytes[o + 1]]);
150 let u32_at = |o: usize| {
151 u32::from_le_bytes([bytes[o], bytes[o + 1], bytes[o + 2], bytes[o + 3]])
152 };
153
154 Ok(Self {
155 modification_time: Timedate::from_bytes(&bytes[2..10])?,
156 uncompressed_size: u32_at(10),
157 number_of_summary_buckets: u32_at(14),
158 number_of_non_summary_buckets: u32_at(18),
159 number_of_bitmaps: u32_at(22),
160 rrv_bucket_size: u32_at(26),
161 data_rrv_bucket_position: u32_at(30),
162 rrv_identifier_low: u32_at(34),
163 rrv_identifier_high: u32_at(38),
164 bitmap_size: u32_at(42),
165 data_note_identifier_table_size: u32_at(46),
166 modified_note_log_size: u32_at(50),
167 folder_directory_object_size: u32_at(54),
168 flags: u16_at(58),
169 write_count: u32_at(60),
170 size: u32_at(64),
171 compression_type: u16_at(68),
172 number_of_summary_bucket_descriptor_pages: u32_at(70),
173 number_of_non_summary_bucket_descriptor_pages: u32_at(74),
174 number_of_soft_deleted_note_entries: u32_at(78),
175 shared_template_information_size: u16_at(82),
176 number_of_form_names: u16_at(86),
177 form_bitmap_size: u32_at(88),
178 })
179 }
180
181 /// Comparable absolute UTC timestamp from this superblock's
182 /// `modification_time`. Returned as `(julian_day, centiseconds)`
183 /// which compares lexicographically and treats different timezones
184 /// uniformly (Innards[0] is always centiseconds since midnight UTC
185 /// per the format spec).
186 pub fn modification_sort_key(&self) -> (u32, u32) {
187 let julian = self.modification_time.innards1 & 0x00FF_FFFF;
188 (julian, self.modification_time.innards0)
189 }
190}
191
192/// Pick the freshest superblock from a slice of parsed superblocks. Each
193/// element is paired with its slot index 0..=3 so the caller can report
194/// which copy was selected. Returns `None` if the input is empty.
195///
196/// Comparison uses `(julian_day, centiseconds_since_midnight_utc)` from
197/// the superblock's `modification_time` - both UTC, both monotonic across
198/// commits.
199pub fn select_freshest(superblocks: &[(usize, Superblock)]) -> Option<(usize, Superblock)> {
200 superblocks
201 .iter()
202 .copied()
203 .max_by_key(|(_, sb)| sb.modification_sort_key())
204}
205
206#[cfg(test)]
207mod tests {
208 use super::*;
209
210 /// Build a synthetic superblock header. `julian` sets the JDN portion
211 /// of `modification_time`; `centi` sets centiseconds; the rest of the
212 /// fields are arbitrary fixed values for round-trip testing.
213 fn synthetic(julian: u32, centi: u32) -> Vec<u8> {
214 let mut buf = vec![0u8; SUPERBLOCK_HEADER_BYTES];
215 buf[0..2].copy_from_slice(&SUPERBLOCK_SIGNATURE);
216 // Innards[0] = centiseconds (offset 2..6).
217 buf[2..6].copy_from_slice(¢i.to_le_bytes());
218 // Innards[1] low 24 bits = JDN (offset 6..10).
219 buf[6..10].copy_from_slice(&julian.to_le_bytes());
220 // rrv_bucket_size at offset 26, set to 0x1000.
221 buf[26..30].copy_from_slice(&0x1000u32.to_le_bytes());
222 // data_rrv_bucket_position at offset 30, set to 0x2af0.
223 buf[30..34].copy_from_slice(&0x2af0u32.to_le_bytes());
224 // size at offset 64, set to 4096.
225 buf[64..68].copy_from_slice(&4096u32.to_le_bytes());
226 // number_of_summary_bucket_descriptor_pages at offset 70.
227 buf[70..74].copy_from_slice(&3u32.to_le_bytes());
228 // number_of_non_summary_bucket_descriptor_pages at offset 74.
229 buf[74..78].copy_from_slice(&5u32.to_le_bytes());
230 buf
231 }
232
233 #[test]
234 fn parses_synthetic_superblock() {
235 let buf = synthetic(2_450_428, 0x006C_DCC0);
236 let sb = Superblock::parse(&buf).unwrap();
237 assert_eq!(sb.rrv_bucket_size, 0x1000);
238 assert_eq!(sb.data_rrv_bucket_position, 0x2af0);
239 assert_eq!(sb.size, 4096);
240 assert_eq!(sb.number_of_summary_bucket_descriptor_pages, 3);
241 assert_eq!(sb.number_of_non_summary_bucket_descriptor_pages, 5);
242 assert_eq!(sb.modification_sort_key(), (2_450_428, 0x006C_DCC0));
243 }
244
245 #[test]
246 fn rejects_bad_signature() {
247 let mut buf = synthetic(2_450_428, 0);
248 buf[0] = 0xFF;
249 let err = Superblock::parse(&buf).unwrap_err();
250 assert!(matches!(
251 err,
252 NsfError::BadSubrecordSignature {
253 kind: "superblock",
254 ..
255 }
256 ));
257 // Error message must identify which signature failed - not
258 // confused with the file-header "1A 00" signature.
259 let msg = err.to_string();
260 assert!(msg.contains("superblock"), "got: {msg}");
261 assert!(msg.contains("0E 00"), "got: {msg}");
262 }
263
264 #[test]
265 fn rejects_short_buffer() {
266 let buf = vec![0u8; SUPERBLOCK_HEADER_BYTES - 1];
267 let err = Superblock::parse(&buf).unwrap_err();
268 assert!(matches!(err, NsfError::TooShort { .. }));
269 }
270
271 #[test]
272 fn select_freshest_picks_highest_julian_day() {
273 let sb_old = Superblock::parse(&synthetic(2_450_000, 0)).unwrap();
274 let sb_new = Superblock::parse(&synthetic(2_500_000, 0)).unwrap();
275 let sb_mid = Superblock::parse(&synthetic(2_460_000, 0)).unwrap();
276 let result = select_freshest(&[(0, sb_old), (1, sb_new), (2, sb_mid)]);
277 assert_eq!(result.unwrap().0, 1);
278 assert_eq!(result.unwrap().1.modification_sort_key().0, 2_500_000);
279 }
280
281 #[test]
282 fn select_freshest_breaks_ties_by_centiseconds() {
283 let sb_morning = Superblock::parse(&synthetic(2_500_000, 1_000_000)).unwrap();
284 let sb_evening = Superblock::parse(&synthetic(2_500_000, 8_000_000)).unwrap();
285 let sb_noon = Superblock::parse(&synthetic(2_500_000, 4_320_000)).unwrap();
286 let result = select_freshest(&[(0, sb_morning), (1, sb_evening), (2, sb_noon)]);
287 assert_eq!(result.unwrap().0, 1);
288 }
289
290 #[test]
291 fn select_freshest_empty_returns_none() {
292 let v: Vec<(usize, Superblock)> = vec![];
293 assert!(select_freshest(&v).is_none());
294 }
295
296 #[test]
297 fn select_freshest_single_returns_that_one() {
298 let sb = Superblock::parse(&synthetic(2_450_428, 0)).unwrap();
299 let result = select_freshest(&[(2, sb)]);
300 assert_eq!(result.unwrap().0, 2);
301 }
302}