sherlock_nsf_parser/header.rs
1//! Database header (DBINFO) parsing.
2//!
3//! Layout per the authoritative `nsfdb_database_header.h` from libyal/
4//! libnsfdb (LGPL-3.0-or-later; not vendored, fields re-declared here
5//! by name). All offsets are relative to the start of DBINFO, which
6//! itself starts at file offset 6 (immediately after the 6-byte file
7//! header).
8//!
9//! ```text
10//! offset width field
11//! 0 4 format_version (ODS)
12//! 4 8 database_identifier (TIMEDATE)
13//! 12 2 application_version
14//! 14 4 non_data_rrv_bucket_position
15//! 18 4 available_non_data_rrv_identifier
16//! 22 2 number_of_available_non_data_rrvs
17//! 24 4 activity_log_offset
18//! 28 8 bucket_modification_time (TIMEDATE)
19//! 36 2 database_class
20//! 38 2 database_flags
21//! 40 4 bucket_descriptor_block_size
22//! 44 4 bucket_descriptor_block_position (BDB)
23//! 48 2 bdt_size
24//! 50 4 bdt_position
25//! 54 2 bdt_bitmaps
26//! 56 4 data_rrv_bucket_position
27//! 60 4 first_data_rrv_identifier
28//! 64 4 available_data_rrv_identifier
29//! 68 2 number_of_available_data_rrvs
30//! 70 2 rrv_bucket_size
31//! 72 2 summary_bucket_size
32//! 74 2 bitmap_size
33//! 76 2 allocation_granularity
34//! 78 4 extention_granularity
35//! 82 4 file_size (in 256-byte units)
36//! 86.. (additional fields not yet consumed by this crate)
37//! ```
38//!
39//! All multi-byte integers are little-endian.
40//!
41//! Empirical notes from the 17-sample corpus:
42//!
43//! - `bucket_descriptor_block_position` can legitimately be zero on
44//! fresh templates that have not yet been instantiated. The
45//! `data_rrv_bucket_position` is the more reliable "where data
46//! actually lives" pointer; use it to seed RRV walking.
47//! - Database flag bit 0x0040 is NOT the encryption flag despite
48//! operator-forum lore. Every file in the corpus (templates and
49//! real .nsfs alike) has that bit set, and none are encrypted. The
50//! authoritative bit position for "Local Database Encryption" lives
51//! in HCL's `dbopts.h` which is not yet imported. Encryption
52//! detection is deferred to a later slice; the constant in
53//! `flags::DBFLAG_LOCAL_PROTECTED` is left as a known-uncertain
54//! placeholder with `is_database_encrypted` returning a documented
55//! "unknown" via `Option<bool>`.
56
57use crate::detect::{identify_file_strict, FileKind};
58use crate::error::NsfError;
59use crate::ods::Ods;
60use crate::time::Timedate;
61
62const DBINFO_START: usize = 6;
63const DBINFO_CORE_MIN: usize = 128;
64
65/// Flag bits in DBINFO's `database_flags` u16 at offset 38. Bit
66/// interpretation here is what we have verified against the 17-sample
67/// corpus; entries marked `tentative` are still uncertain and not yet
68/// used to drive any feature.
69pub mod flags {
70 /// Database is a template (.ntf semantics) rather than a regular
71 /// database (.nsf). Verified empirically against the 8-template +
72 /// 5-locale-template + 4-real-nsf corpus: set on every .ntf in the
73 /// corpus, clear on every .nsf.
74 pub const DBFLAG_TEMPLATE: u16 = 0x0010;
75}
76
77/// Parsed database header. Self-contained snapshot of DBINFO - the
78/// reader does not retain a reference into the file bytes.
79#[derive(Debug, Clone, Copy, PartialEq, Eq)]
80pub struct DbHeader {
81 /// db_header_size from the outermost 6-byte file header.
82 pub db_header_size: u32,
83 /// ODS version (DBINFO offset 0).
84 pub ods: Ods,
85 /// Database identifier (DBINFO offset 4). 8-byte TIMEDATE used as
86 /// an opaque identifier.
87 pub database_id: Timedate,
88 /// Application-defined version (DBINFO offset 12). Free-form u16
89 /// for the form designer's use.
90 pub app_version: u16,
91 /// File offset of the bucket holding non-data RRVs (DBINFO offset
92 /// 14). Design notes, ACL notes, replication info, etc.
93 pub non_data_rrv_bucket_position: u32,
94 /// (next) available non-data RRV identifier (DBINFO offset 18).
95 pub available_non_data_rrv_identifier: u32,
96 /// Number of available non-data RRVs (DBINFO offset 22).
97 pub number_of_available_non_data_rrvs: u16,
98 /// Activity log offset (DBINFO offset 24).
99 pub activity_log_offset: u32,
100 /// Most recent bucket modification time (DBINFO offset 28).
101 pub bucket_modification: Timedate,
102 /// Database class (DBINFO offset 36). 2-byte identifier of what
103 /// kind of database this is (mailbox / template / design / etc).
104 pub database_class: u16,
105 /// Database flags word (DBINFO offset 38). Use [`flags`]
106 /// constants to interpret; only [`flags::DBFLAG_TEMPLATE`] is
107 /// verified.
108 pub database_flags: u16,
109 /// Bucket Descriptor Block size (DBINFO offset 40).
110 pub bucket_descriptor_block_size: u32,
111 /// Bucket Descriptor Block position (DBINFO offset 44). Can be
112 /// zero on freshly-instantiated templates; use
113 /// [`Self::data_rrv_bucket_position`] for "where notes live"
114 /// rather than this.
115 pub bucket_descriptor_block_position: u32,
116 /// Bucket Descriptor Table size (DBINFO offset 48).
117 pub bdt_size: u16,
118 /// Bucket Descriptor Table position (DBINFO offset 50).
119 pub bdt_position: u32,
120 /// Bucket Descriptor Table bitmaps (DBINFO offset 54).
121 pub bdt_bitmaps: u16,
122 /// File offset of the bucket holding data RRVs (DBINFO offset 56).
123 /// THIS is the entry point for note enumeration. Non-zero on any
124 /// database that contains notes.
125 pub data_rrv_bucket_position: u32,
126 /// First data RRV identifier (DBINFO offset 60).
127 pub first_data_rrv_identifier: u32,
128 /// (next) available data RRV identifier (DBINFO offset 64).
129 pub available_data_rrv_identifier: u32,
130 /// Number of available data RRVs (DBINFO offset 68).
131 pub number_of_available_data_rrvs: u16,
132 /// Size of each RRV bucket in bytes (DBINFO offset 70).
133 pub rrv_bucket_size: u16,
134 /// Size of each summary bucket in bytes (DBINFO offset 72).
135 pub summary_bucket_size: u16,
136 /// Bitmap allocation map size (DBINFO offset 74).
137 pub bitmap_size: u16,
138 /// Allocation granularity (DBINFO offset 76).
139 pub allocation_granularity: u16,
140 /// Extention granularity (DBINFO offset 78). (Spelling matches the
141 /// libnsfdb header which inherited the typo from the Notes C API.)
142 pub extention_granularity: u32,
143 /// File size in 256-byte units (DBINFO offset 82). Multiply by 256
144 /// to get the bytes the database knows about; may diverge from the
145 /// OS-reported file size if the file was truncated since the
146 /// header was last rewritten.
147 pub file_size_pages: u32,
148}
149
150impl DbHeader {
151 /// Parse the file header + DBINFO core from a byte slice containing
152 /// at least the first 6 + 128 = 134 bytes of the file.
153 pub fn parse(bytes: &[u8]) -> Result<Self, NsfError> {
154 let file_kind = identify_file_strict(bytes)?;
155 let db_header_size = match file_kind {
156 FileKind::Nsf { db_header_size } => db_header_size,
157 FileKind::NotNsf { reason } => {
158 let _ = reason;
159 return Err(NsfError::BadFileSignature { observed: [0, 0] });
160 }
161 };
162
163 let required = DBINFO_START + DBINFO_CORE_MIN;
164 if bytes.len() < required {
165 return Err(NsfError::TooShort {
166 actual: bytes.len(),
167 required,
168 });
169 }
170
171 let d = &bytes[DBINFO_START..DBINFO_START + DBINFO_CORE_MIN];
172
173 // Helper closures: little-endian readers at the given DBINFO
174 // offset. Keeps the field-extraction lines visually aligned with
175 // the struct definition above and lets the optimizer fold the
176 // bounds checks (we asserted DBINFO_CORE_MIN above).
177 let u16_at = |o: usize| u16::from_le_bytes([d[o], d[o + 1]]);
178 let u32_at = |o: usize| u32::from_le_bytes([d[o], d[o + 1], d[o + 2], d[o + 3]]);
179
180 let ods_raw = u32_at(0);
181 let database_id = Timedate::from_bytes(&d[4..12])?;
182 let app_version = u16_at(12);
183 let non_data_rrv_bucket_position = u32_at(14);
184 let available_non_data_rrv_identifier = u32_at(18);
185 let number_of_available_non_data_rrvs = u16_at(22);
186 let activity_log_offset = u32_at(24);
187 let bucket_modification = Timedate::from_bytes(&d[28..36])?;
188 let database_class = u16_at(36);
189 let database_flags = u16_at(38);
190 let bucket_descriptor_block_size = u32_at(40);
191 let bucket_descriptor_block_position = u32_at(44);
192 let bdt_size = u16_at(48);
193 let bdt_position = u32_at(50);
194 let bdt_bitmaps = u16_at(54);
195 let data_rrv_bucket_position = u32_at(56);
196 let first_data_rrv_identifier = u32_at(60);
197 let available_data_rrv_identifier = u32_at(64);
198 let number_of_available_data_rrvs = u16_at(68);
199 let rrv_bucket_size = u16_at(70);
200 let summary_bucket_size = u16_at(72);
201 let bitmap_size = u16_at(74);
202 let allocation_granularity = u16_at(76);
203 let extention_granularity = u32_at(78);
204 let file_size_pages = u32_at(82);
205
206 Ok(Self {
207 db_header_size,
208 ods: Ods::new(ods_raw),
209 database_id,
210 app_version,
211 non_data_rrv_bucket_position,
212 available_non_data_rrv_identifier,
213 number_of_available_non_data_rrvs,
214 activity_log_offset,
215 bucket_modification,
216 database_class,
217 database_flags,
218 bucket_descriptor_block_size,
219 bucket_descriptor_block_position,
220 bdt_size,
221 bdt_position,
222 bdt_bitmaps,
223 data_rrv_bucket_position,
224 first_data_rrv_identifier,
225 available_data_rrv_identifier,
226 number_of_available_data_rrvs,
227 rrv_bucket_size,
228 summary_bucket_size,
229 bitmap_size,
230 allocation_granularity,
231 extention_granularity,
232 file_size_pages,
233 })
234 }
235
236 /// True if the database is flagged as a template (.ntf semantics).
237 /// Verified empirically against the corpus: set on every .ntf,
238 /// clear on every .nsf.
239 pub fn is_template(&self) -> bool {
240 self.database_flags & flags::DBFLAG_TEMPLATE != 0
241 }
242
243 /// Encryption detection: NOT IMPLEMENTED in v0.1.
244 ///
245 /// The libnsfdb spec leaves the encryption-flag bit position as
246 /// TODO. The widely-cited 0x0040 value does NOT match the corpus
247 /// (every sample has that bit set; none are encrypted). The
248 /// authoritative bit lives in HCL's `dbopts.h` which we have not
249 /// yet imported.
250 ///
251 /// Returns `None` until detection is reliable. The viewer surfaces
252 /// this as "encryption detection deferred" rather than reporting
253 /// false negatives.
254 pub fn is_database_encrypted(&self) -> Option<bool> {
255 None
256 }
257
258 /// Convenience: file-size estimate from the header's
259 /// 256-byte-increment field. Multiply by 256.
260 pub fn file_size_from_header_bytes(&self) -> u64 {
261 (self.file_size_pages as u64) * 256
262 }
263}
264
265#[cfg(test)]
266mod tests {
267 use super::*;
268
269 /// Build a minimal-but-valid header for unit tests. Values are
270 /// chosen to be unambiguous (no zeros that overlap with field
271 /// defaults).
272 fn synthetic_header(ods: u32, flags: u16) -> Vec<u8> {
273 let mut buf = vec![0u8; 256];
274 // File header: LSIG + db_header_size = 1024.
275 buf[0] = 0x1A;
276 buf[1] = 0x00;
277 buf[2..6].copy_from_slice(&1024u32.to_le_bytes());
278 // DBINFO @ file offset 6.
279 // ODS at DBINFO offset 0 (file 6).
280 buf[6..10].copy_from_slice(&ods.to_le_bytes());
281 // database_flags at DBINFO offset 38 (file 44).
282 buf[44..46].copy_from_slice(&flags.to_le_bytes());
283 // bucket_descriptor_block_position at DBINFO offset 44 (file 50).
284 buf[50..54].copy_from_slice(&0x0000_4000u32.to_le_bytes());
285 // data_rrv_bucket_position at DBINFO offset 56 (file 62).
286 buf[62..66].copy_from_slice(&0x0000_2af0u32.to_le_bytes());
287 // file_size at DBINFO offset 82 (file 88).
288 buf[88..92].copy_from_slice(&5000u32.to_le_bytes());
289 buf
290 }
291
292 #[test]
293 fn parses_synthetic_ods_53_unencrypted() {
294 let buf = synthetic_header(53, 0);
295 let h = DbHeader::parse(&buf).unwrap();
296 assert_eq!(h.db_header_size, 1024);
297 assert_eq!(h.ods.raw, 53);
298 assert!(!h.is_template());
299 assert!(h.is_database_encrypted().is_none(), "encryption detection deferred");
300 assert_eq!(h.bucket_descriptor_block_position, 0x0000_4000);
301 assert_eq!(h.data_rrv_bucket_position, 0x0000_2af0);
302 assert_eq!(h.file_size_pages, 5000);
303 assert_eq!(h.file_size_from_header_bytes(), 5000 * 256);
304 }
305
306 #[test]
307 fn flags_template_decodes_correctly() {
308 let buf = synthetic_header(53, flags::DBFLAG_TEMPLATE);
309 let h = DbHeader::parse(&buf).unwrap();
310 assert!(h.is_template());
311 }
312
313 #[test]
314 fn rejects_bad_magic() {
315 let mut buf = synthetic_header(53, 0);
316 buf[0] = 0xDE;
317 buf[1] = 0xAD;
318 let err = DbHeader::parse(&buf).unwrap_err();
319 assert!(matches!(err, NsfError::BadFileSignature { .. }));
320 }
321
322 #[test]
323 fn rejects_too_short_for_dbinfo() {
324 let buf: Vec<u8> = vec![0x1A, 0x00, 0x00, 0x04, 0x00, 0x00];
325 let err = DbHeader::parse(&buf).unwrap_err();
326 assert!(matches!(err, NsfError::TooShort { .. }));
327 }
328
329 #[test]
330 fn ods_supported_check_works_via_header() {
331 let buf_modern = synthetic_header(53, 0);
332 let h_modern = DbHeader::parse(&buf_modern).unwrap();
333 assert!(h_modern.ods.is_supported_for_enumeration());
334
335 let buf_legacy = synthetic_header(17, 0);
336 let h_legacy = DbHeader::parse(&buf_legacy).unwrap();
337 assert!(!h_legacy.ods.is_supported_for_enumeration());
338 }
339
340 #[test]
341 fn parses_canonical_comparedbs_ntf_header_bytes() {
342 // First 96 bytes of comparedbs.ntf from the corpus. Pinned here
343 // so any future regression in field-offset arithmetic is
344 // immediately visible. Generated by xxd of the real file.
345 #[rustfmt::skip]
346 let bytes: &[u8] = &[
347 0x1a, 0x00, 0x00, 0x04, 0x00, 0x00, 0x34, 0x00,
348 0x00, 0x00, 0xa9, 0xf4, 0x61, 0x00, 0x0c, 0x88,
349 0x25, 0x85, 0x00, 0x00, 0xe0, 0x03, 0x00, 0x00,
350 0xf6, 0x03, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00,
351 0x00, 0x00, 0x3f, 0x08, 0x62, 0x00, 0x0c, 0x88,
352 0x25, 0x00, 0x04, 0xff, 0x50, 0x42, 0x00, 0x00,
353 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
354 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0xf0, 0x2a,
355 0x00, 0x00, 0xf6, 0x08, 0x00, 0x00, 0x5a, 0x09,
356 0x00, 0x00, 0xe3, 0x01, 0x00, 0x10, 0x00, 0x20,
357 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00,
358 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
359 ];
360 // Pad to the DBINFO_CORE_MIN size with zeros (rest is unused).
361 let mut buf = bytes.to_vec();
362 buf.resize(256, 0);
363 let h = DbHeader::parse(&buf).unwrap();
364 // ODS 52 = Notes 9.0.1.
365 assert_eq!(h.ods.raw, 52);
366 // Template flag set (.ntf).
367 assert!(h.is_template(), "comparedbs.ntf flags = 0x{:04X}", h.database_flags);
368 // BDB is genuinely zero on this template; data RRV is at 0x2af0.
369 assert_eq!(h.bucket_descriptor_block_position, 0);
370 assert_eq!(h.data_rrv_bucket_position, 0x2af0);
371 // File size 0x3000 pages = 0x300000 = 3 MB (matches actual 3.1 MB).
372 assert_eq!(h.file_size_pages, 0x3000);
373 // RRV bucket size = 0x1000 = 4 KB pages, the modern Domino default.
374 assert_eq!(h.rrv_bucket_size, 0x1000);
375 }
376}