Skip to main content

sherlock_nsf_parser/
superblock.rs

1//! Superblock parsing.
2//!
3//! The superblock is the second-tier metadata structure in an NSF database.
4//! [`crate::info2::Information2`] holds the file positions of **four**
5//! superblock copies: Domino writes all four on every commit so a crash
6//! leaves at least one valid. The freshest by `modification_time` is the
7//! authoritative copy; the other three are write-ahead-log redundancy.
8//!
9//! The freshest superblock is the entry point for Bucket Descriptor Table
10//! (BDT) walking - it carries the page-count fields plus the data RRV
11//! bucket position used for modern-ODS note enumeration. The `bdt_position`
12//! field in DBINFO is always zero on modern ODS; only the superblock
13//! resolves bucket_index -> file_offset.
14//!
15//! Header layout (100 bytes, LE throughout) per
16//! `libnsfdb/nsfdb_superblock.h::nsfdb_superblock_header`:
17//!
18//! ```text
19//! offset  width  field
20//!     0      2   signature (0x0E 0x00)
21//!     2      8   modification_time (TIMEDATE)
22//!    10      4   uncompressed_size
23//!    14      4   number_of_summary_buckets
24//!    18      4   number_of_non_summary_buckets
25//!    22      4   number_of_bitmaps
26//!    26      4   rrv_bucket_size
27//!    30      4   data_rrv_bucket_position (256-byte units)
28//!    34      4   rrv_identifier_low
29//!    38      4   rrv_identifier_high
30//!    42      4   bitmap_size
31//!    46      4   data_note_identifier_table_size
32//!    50      4   modified_note_log_size
33//!    54      4   folder_directory_object_size
34//!    58      2   flags
35//!    60      4   write_count
36//!    64      4   size
37//!    68      2   compression_type
38//!    70      4   number_of_summary_bucket_descriptor_pages
39//!    74      4   number_of_non_summary_bucket_descriptor_pages
40//!    78      4   number_of_soft_deleted_note_entries
41//!    82      2   shared_template_information_size
42//!    84      2   unknown1
43//!    86      2   number_of_form_names
44//!    88      4   form_bitmap_size
45//!    92      8   unknown2
46//! ```
47//!
48//! Footer layout (12 bytes) per
49//! `libnsfdb/nsfdb_superblock.h::nsfdb_superblock_footer`:
50//!
51//! ```text
52//! offset  width  field
53//!     0      8   modification_time (TIMEDATE)
54//!     4      4   checksum (XOR-32 of superblock body)
55//! ```
56//!
57//! Note: the NSF_HANDOFF.md document lists the header as 110 bytes; the
58//! authoritative `nsfdb_superblock.h` struct sums to **100 bytes** for
59//! the header plus 12 for the footer. This module trusts the struct.
60
61use crate::error::NsfError;
62use crate::time::Timedate;
63
64/// Magic two bytes at offset 0 of every superblock header.
65pub const SUPERBLOCK_SIGNATURE: [u8; 2] = [0x0E, 0x00];
66/// On-disk size of the superblock header in bytes.
67pub const SUPERBLOCK_HEADER_BYTES: usize = 100;
68/// On-disk size of the superblock footer in bytes.
69pub const SUPERBLOCK_FOOTER_BYTES: usize = 12;
70
71/// Parsed superblock header. Field naming mirrors the libnsfdb struct.
72#[derive(Debug, Clone, Copy, PartialEq, Eq)]
73pub struct Superblock {
74    /// Most recent modification time. Used to select the freshest of the
75    /// four superblock copies via [`select_freshest`].
76    pub modification_time: Timedate,
77    /// Uncompressed size of the superblock body (when compressed).
78    pub uncompressed_size: u32,
79    /// Total summary buckets allocated in the database.
80    pub number_of_summary_buckets: u32,
81    /// Total non-summary buckets allocated in the database.
82    pub number_of_non_summary_buckets: u32,
83    /// Total bitmaps allocated in the database.
84    pub number_of_bitmaps: u32,
85    /// Size in bytes of each RRV bucket. Should match DBINFO's
86    /// `rrv_bucket_size`; cross-validate at the database layer.
87    pub rrv_bucket_size: u32,
88    /// Data RRV bucket position in 256-byte units. Multiply by 256 for
89    /// the byte offset. Should match DBINFO's `data_rrv_bucket_position`
90    /// for non-fresh templates.
91    pub data_rrv_bucket_position: u32,
92    /// Lower 32 bits of the next-available RRV identifier counter.
93    pub rrv_identifier_low: u32,
94    /// Upper 32 bits of the next-available RRV identifier counter.
95    pub rrv_identifier_high: u32,
96    /// Allocation-bitmap size in bytes.
97    pub bitmap_size: u32,
98    /// Data Note Identifier Table size in bytes.
99    pub data_note_identifier_table_size: u32,
100    /// Modified-note-log size in bytes.
101    pub modified_note_log_size: u32,
102    /// Folder Directory Object (FDO) size in bytes.
103    pub folder_directory_object_size: u32,
104    /// Flags word.
105    pub flags: u16,
106    /// Write-count counter. Increments on each superblock commit.
107    pub write_count: u32,
108    /// Total size in bytes of this superblock (header + body + footer).
109    pub size: u32,
110    /// Compression type of the superblock body. Typically zero
111    /// (uncompressed) on modern Domino.
112    pub compression_type: u16,
113    /// Number of summary-bucket-descriptor pages reachable via this
114    /// superblock. Used by Slice 2.6 Phase B for BDT walking.
115    pub number_of_summary_bucket_descriptor_pages: u32,
116    /// Number of non-summary-bucket-descriptor pages reachable via this
117    /// superblock.
118    pub number_of_non_summary_bucket_descriptor_pages: u32,
119    /// Number of soft-deleted note entries (notes moved to $Trash with
120    /// retention period still active).
121    pub number_of_soft_deleted_note_entries: u32,
122    /// Shared-template-information block size in bytes.
123    pub shared_template_information_size: u16,
124    /// Number of form names cached at the superblock level.
125    pub number_of_form_names: u16,
126    /// Form-bitmap size in bytes.
127    pub form_bitmap_size: u32,
128}
129
130impl Superblock {
131    /// Parse a superblock header from the first
132    /// [`SUPERBLOCK_HEADER_BYTES`] of a buffer positioned at the
133    /// superblock's file offset.
134    pub fn parse(bytes: &[u8]) -> Result<Self, NsfError> {
135        if bytes.len() < SUPERBLOCK_HEADER_BYTES {
136            return Err(NsfError::TooShort {
137                actual: bytes.len(),
138                required: SUPERBLOCK_HEADER_BYTES,
139            });
140        }
141        if bytes[0] != SUPERBLOCK_SIGNATURE[0] || bytes[1] != SUPERBLOCK_SIGNATURE[1] {
142            return Err(NsfError::BadSubrecordSignature {
143                kind: "superblock",
144                expected: SUPERBLOCK_SIGNATURE,
145                observed: [bytes[0], bytes[1]],
146            });
147        }
148
149        let u16_at = |o: usize| u16::from_le_bytes([bytes[o], bytes[o + 1]]);
150        let u32_at = |o: usize| {
151            u32::from_le_bytes([bytes[o], bytes[o + 1], bytes[o + 2], bytes[o + 3]])
152        };
153
154        Ok(Self {
155            modification_time: Timedate::from_bytes(&bytes[2..10])?,
156            uncompressed_size: u32_at(10),
157            number_of_summary_buckets: u32_at(14),
158            number_of_non_summary_buckets: u32_at(18),
159            number_of_bitmaps: u32_at(22),
160            rrv_bucket_size: u32_at(26),
161            data_rrv_bucket_position: u32_at(30),
162            rrv_identifier_low: u32_at(34),
163            rrv_identifier_high: u32_at(38),
164            bitmap_size: u32_at(42),
165            data_note_identifier_table_size: u32_at(46),
166            modified_note_log_size: u32_at(50),
167            folder_directory_object_size: u32_at(54),
168            flags: u16_at(58),
169            write_count: u32_at(60),
170            size: u32_at(64),
171            compression_type: u16_at(68),
172            number_of_summary_bucket_descriptor_pages: u32_at(70),
173            number_of_non_summary_bucket_descriptor_pages: u32_at(74),
174            number_of_soft_deleted_note_entries: u32_at(78),
175            shared_template_information_size: u16_at(82),
176            number_of_form_names: u16_at(86),
177            form_bitmap_size: u32_at(88),
178        })
179    }
180
181    /// Comparable absolute UTC timestamp from this superblock's
182    /// `modification_time`. Returned as `(julian_day, centiseconds)`
183    /// which compares lexicographically and treats different timezones
184    /// uniformly (Innards[0] is always centiseconds since midnight UTC
185    /// per the format spec).
186    pub fn modification_sort_key(&self) -> (u32, u32) {
187        let julian = self.modification_time.innards1 & 0x00FF_FFFF;
188        (julian, self.modification_time.innards0)
189    }
190}
191
192/// Pick the freshest superblock from a slice of parsed superblocks. Each
193/// element is paired with its slot index 0..=3 so the caller can report
194/// which copy was selected. Returns `None` if the input is empty.
195///
196/// Comparison uses `(julian_day, centiseconds_since_midnight_utc)` from
197/// the superblock's `modification_time` - both UTC, both monotonic across
198/// commits.
199pub fn select_freshest(superblocks: &[(usize, Superblock)]) -> Option<(usize, Superblock)> {
200    superblocks
201        .iter()
202        .copied()
203        .max_by_key(|(_, sb)| sb.modification_sort_key())
204}
205
206#[cfg(test)]
207mod tests {
208    use super::*;
209
210    /// Build a synthetic superblock header. `julian` sets the JDN portion
211    /// of `modification_time`; `centi` sets centiseconds; the rest of the
212    /// fields are arbitrary fixed values for round-trip testing.
213    fn synthetic(julian: u32, centi: u32) -> Vec<u8> {
214        let mut buf = vec![0u8; SUPERBLOCK_HEADER_BYTES];
215        buf[0..2].copy_from_slice(&SUPERBLOCK_SIGNATURE);
216        // Innards[0] = centiseconds (offset 2..6).
217        buf[2..6].copy_from_slice(&centi.to_le_bytes());
218        // Innards[1] low 24 bits = JDN (offset 6..10).
219        buf[6..10].copy_from_slice(&julian.to_le_bytes());
220        // rrv_bucket_size at offset 26, set to 0x1000.
221        buf[26..30].copy_from_slice(&0x1000u32.to_le_bytes());
222        // data_rrv_bucket_position at offset 30, set to 0x2af0.
223        buf[30..34].copy_from_slice(&0x2af0u32.to_le_bytes());
224        // size at offset 64, set to 4096.
225        buf[64..68].copy_from_slice(&4096u32.to_le_bytes());
226        // number_of_summary_bucket_descriptor_pages at offset 70.
227        buf[70..74].copy_from_slice(&3u32.to_le_bytes());
228        // number_of_non_summary_bucket_descriptor_pages at offset 74.
229        buf[74..78].copy_from_slice(&5u32.to_le_bytes());
230        buf
231    }
232
233    #[test]
234    fn parses_synthetic_superblock() {
235        let buf = synthetic(2_450_428, 0x006C_DCC0);
236        let sb = Superblock::parse(&buf).unwrap();
237        assert_eq!(sb.rrv_bucket_size, 0x1000);
238        assert_eq!(sb.data_rrv_bucket_position, 0x2af0);
239        assert_eq!(sb.size, 4096);
240        assert_eq!(sb.number_of_summary_bucket_descriptor_pages, 3);
241        assert_eq!(sb.number_of_non_summary_bucket_descriptor_pages, 5);
242        assert_eq!(sb.modification_sort_key(), (2_450_428, 0x006C_DCC0));
243    }
244
245    #[test]
246    fn rejects_bad_signature() {
247        let mut buf = synthetic(2_450_428, 0);
248        buf[0] = 0xFF;
249        let err = Superblock::parse(&buf).unwrap_err();
250        assert!(matches!(
251            err,
252            NsfError::BadSubrecordSignature {
253                kind: "superblock",
254                ..
255            }
256        ));
257        // Error message must identify which signature failed - not
258        // confused with the file-header "1A 00" signature.
259        let msg = err.to_string();
260        assert!(msg.contains("superblock"), "got: {msg}");
261        assert!(msg.contains("0E 00"), "got: {msg}");
262    }
263
264    #[test]
265    fn rejects_short_buffer() {
266        let buf = vec![0u8; SUPERBLOCK_HEADER_BYTES - 1];
267        let err = Superblock::parse(&buf).unwrap_err();
268        assert!(matches!(err, NsfError::TooShort { .. }));
269    }
270
271    #[test]
272    fn select_freshest_picks_highest_julian_day() {
273        let sb_old = Superblock::parse(&synthetic(2_450_000, 0)).unwrap();
274        let sb_new = Superblock::parse(&synthetic(2_500_000, 0)).unwrap();
275        let sb_mid = Superblock::parse(&synthetic(2_460_000, 0)).unwrap();
276        let result = select_freshest(&[(0, sb_old), (1, sb_new), (2, sb_mid)]);
277        assert_eq!(result.unwrap().0, 1);
278        assert_eq!(result.unwrap().1.modification_sort_key().0, 2_500_000);
279    }
280
281    #[test]
282    fn select_freshest_breaks_ties_by_centiseconds() {
283        let sb_morning = Superblock::parse(&synthetic(2_500_000, 1_000_000)).unwrap();
284        let sb_evening = Superblock::parse(&synthetic(2_500_000, 8_000_000)).unwrap();
285        let sb_noon = Superblock::parse(&synthetic(2_500_000, 4_320_000)).unwrap();
286        let result = select_freshest(&[(0, sb_morning), (1, sb_evening), (2, sb_noon)]);
287        assert_eq!(result.unwrap().0, 1);
288    }
289
290    #[test]
291    fn select_freshest_empty_returns_none() {
292        let v: Vec<(usize, Superblock)> = vec![];
293        assert!(select_freshest(&v).is_none());
294    }
295
296    #[test]
297    fn select_freshest_single_returns_that_one() {
298        let sb = Superblock::parse(&synthetic(2_450_428, 0)).unwrap();
299        let result = select_freshest(&[(2, sb)]);
300        assert_eq!(result.unwrap().0, 2);
301    }
302}