Skip to main content

sherlock_nsf_parser/
header.rs

1//! Database header (DBINFO) parsing.
2//!
3//! Layout per the authoritative `nsfdb_database_header.h` from libyal/
4//! libnsfdb (LGPL-3.0-or-later; not vendored, fields re-declared here
5//! by name). All offsets are relative to the start of DBINFO, which
6//! itself starts at file offset 6 (immediately after the 6-byte file
7//! header).
8//!
9//! ```text
10//! offset  width  field
11//!     0      4   format_version (ODS)
12//!     4      8   database_identifier (TIMEDATE)
13//!    12      2   application_version
14//!    14      4   non_data_rrv_bucket_position
15//!    18      4   available_non_data_rrv_identifier
16//!    22      2   number_of_available_non_data_rrvs
17//!    24      4   activity_log_offset
18//!    28      8   bucket_modification_time (TIMEDATE)
19//!    36      2   database_class
20//!    38      2   database_flags
21//!    40      4   bucket_descriptor_block_size
22//!    44      4   bucket_descriptor_block_position (BDB)
23//!    48      2   bdt_size
24//!    50      4   bdt_position
25//!    54      2   bdt_bitmaps
26//!    56      4   data_rrv_bucket_position
27//!    60      4   first_data_rrv_identifier
28//!    64      4   available_data_rrv_identifier
29//!    68      2   number_of_available_data_rrvs
30//!    70      2   rrv_bucket_size
31//!    72      2   summary_bucket_size
32//!    74      2   bitmap_size
33//!    76      2   allocation_granularity
34//!    78      4   extention_granularity
35//!    82      4   file_size (in 256-byte units)
36//!    86..       (additional fields not yet consumed by this crate)
37//! ```
38//!
39//! All multi-byte integers are little-endian.
40//!
41//! Empirical notes from the 17-sample corpus:
42//!
43//! - `bucket_descriptor_block_position` can legitimately be zero on
44//!   fresh templates that have not yet been instantiated. The
45//!   `data_rrv_bucket_position` is the more reliable "where data
46//!   actually lives" pointer; use it to seed RRV walking.
47//! - Database flag bit 0x0040 is NOT the encryption flag despite
48//!   operator-forum lore. Every file in the corpus (templates and
49//!   real .nsfs alike) has that bit set, and none are encrypted. The
50//!   authoritative bit position for "Local Database Encryption" lives
51//!   in HCL's `dbopts.h` which is not yet imported. Encryption
52//!   detection is deferred to a later slice; the constant in
53//!   `flags::DBFLAG_LOCAL_PROTECTED` is left as a known-uncertain
54//!   placeholder with `is_database_encrypted` returning a documented
55//!   "unknown" via `Option<bool>`.
56
57use crate::detect::{identify_file_strict, FileKind};
58use crate::error::NsfError;
59use crate::ods::Ods;
60use crate::time::Timedate;
61
62const DBINFO_START: usize = 6;
63const DBINFO_CORE_MIN: usize = 128;
64
65/// Flag bits in DBINFO's `database_flags` u16 at offset 38. Bit
66/// interpretation here is what we have verified against the 17-sample
67/// corpus; entries marked `tentative` are still uncertain and not yet
68/// used to drive any feature.
69pub mod flags {
70    /// Database is a template (.ntf semantics) rather than a regular
71    /// database (.nsf). Verified empirically against the 8-template +
72    /// 5-locale-template + 4-real-nsf corpus: set on every .ntf in the
73    /// corpus, clear on every .nsf.
74    pub const DBFLAG_TEMPLATE: u16 = 0x0010;
75}
76
77/// Parsed database header. Self-contained snapshot of DBINFO - the
78/// reader does not retain a reference into the file bytes.
79#[derive(Debug, Clone, Copy, PartialEq, Eq)]
80pub struct DbHeader {
81    /// db_header_size from the outermost 6-byte file header.
82    pub db_header_size: u32,
83    /// ODS version (DBINFO offset 0).
84    pub ods: Ods,
85    /// Database identifier (DBINFO offset 4). 8-byte TIMEDATE used as
86    /// an opaque identifier.
87    pub database_id: Timedate,
88    /// Application-defined version (DBINFO offset 12). Free-form u16
89    /// for the form designer's use.
90    pub app_version: u16,
91    /// File offset of the bucket holding non-data RRVs (DBINFO offset
92    /// 14). Design notes, ACL notes, replication info, etc.
93    pub non_data_rrv_bucket_position: u32,
94    /// (next) available non-data RRV identifier (DBINFO offset 18).
95    pub available_non_data_rrv_identifier: u32,
96    /// Number of available non-data RRVs (DBINFO offset 22).
97    pub number_of_available_non_data_rrvs: u16,
98    /// Activity log offset (DBINFO offset 24).
99    pub activity_log_offset: u32,
100    /// Most recent bucket modification time (DBINFO offset 28).
101    pub bucket_modification: Timedate,
102    /// Database class (DBINFO offset 36). 2-byte identifier of what
103    /// kind of database this is (mailbox / template / design / etc).
104    pub database_class: u16,
105    /// Database flags word (DBINFO offset 38). Use [`flags`]
106    /// constants to interpret; only [`flags::DBFLAG_TEMPLATE`] is
107    /// verified.
108    pub database_flags: u16,
109    /// Bucket Descriptor Block size (DBINFO offset 40).
110    pub bucket_descriptor_block_size: u32,
111    /// Bucket Descriptor Block position (DBINFO offset 44). Can be
112    /// zero on freshly-instantiated templates; use
113    /// [`Self::data_rrv_bucket_position`] for "where notes live"
114    /// rather than this.
115    pub bucket_descriptor_block_position: u32,
116    /// Bucket Descriptor Table size (DBINFO offset 48).
117    pub bdt_size: u16,
118    /// Bucket Descriptor Table position (DBINFO offset 50).
119    pub bdt_position: u32,
120    /// Bucket Descriptor Table bitmaps (DBINFO offset 54).
121    pub bdt_bitmaps: u16,
122    /// File offset of the bucket holding data RRVs (DBINFO offset 56).
123    /// THIS is the entry point for note enumeration. Non-zero on any
124    /// database that contains notes.
125    pub data_rrv_bucket_position: u32,
126    /// First data RRV identifier (DBINFO offset 60).
127    pub first_data_rrv_identifier: u32,
128    /// (next) available data RRV identifier (DBINFO offset 64).
129    pub available_data_rrv_identifier: u32,
130    /// Number of available data RRVs (DBINFO offset 68).
131    pub number_of_available_data_rrvs: u16,
132    /// Size of each RRV bucket in bytes (DBINFO offset 70).
133    pub rrv_bucket_size: u16,
134    /// Size of each summary bucket in bytes (DBINFO offset 72).
135    pub summary_bucket_size: u16,
136    /// Bitmap allocation map size (DBINFO offset 74).
137    pub bitmap_size: u16,
138    /// Allocation granularity (DBINFO offset 76).
139    pub allocation_granularity: u16,
140    /// Extention granularity (DBINFO offset 78). (Spelling matches the
141    /// libnsfdb header which inherited the typo from the Notes C API.)
142    pub extention_granularity: u32,
143    /// File size in 256-byte units (DBINFO offset 82). Multiply by 256
144    /// to get the bytes the database knows about; may diverge from the
145    /// OS-reported file size if the file was truncated since the
146    /// header was last rewritten.
147    pub file_size_pages: u32,
148}
149
150impl DbHeader {
151    /// Parse the file header + DBINFO core from a byte slice containing
152    /// at least the first 6 + 128 = 134 bytes of the file.
153    pub fn parse(bytes: &[u8]) -> Result<Self, NsfError> {
154        let file_kind = identify_file_strict(bytes)?;
155        let db_header_size = match file_kind {
156            FileKind::Nsf { db_header_size } => db_header_size,
157            FileKind::NotNsf { reason } => {
158                let _ = reason;
159                return Err(NsfError::BadFileSignature { observed: [0, 0] });
160            }
161        };
162
163        let required = DBINFO_START + DBINFO_CORE_MIN;
164        if bytes.len() < required {
165            return Err(NsfError::TooShort {
166                actual: bytes.len(),
167                required,
168            });
169        }
170
171        let d = &bytes[DBINFO_START..DBINFO_START + DBINFO_CORE_MIN];
172
173        // Helper closures: little-endian readers at the given DBINFO
174        // offset. Keeps the field-extraction lines visually aligned with
175        // the struct definition above and lets the optimizer fold the
176        // bounds checks (we asserted DBINFO_CORE_MIN above).
177        let u16_at = |o: usize| u16::from_le_bytes([d[o], d[o + 1]]);
178        let u32_at = |o: usize| u32::from_le_bytes([d[o], d[o + 1], d[o + 2], d[o + 3]]);
179
180        let ods_raw = u32_at(0);
181        let database_id = Timedate::from_bytes(&d[4..12])?;
182        let app_version = u16_at(12);
183        let non_data_rrv_bucket_position = u32_at(14);
184        let available_non_data_rrv_identifier = u32_at(18);
185        let number_of_available_non_data_rrvs = u16_at(22);
186        let activity_log_offset = u32_at(24);
187        let bucket_modification = Timedate::from_bytes(&d[28..36])?;
188        let database_class = u16_at(36);
189        let database_flags = u16_at(38);
190        let bucket_descriptor_block_size = u32_at(40);
191        let bucket_descriptor_block_position = u32_at(44);
192        let bdt_size = u16_at(48);
193        let bdt_position = u32_at(50);
194        let bdt_bitmaps = u16_at(54);
195        let data_rrv_bucket_position = u32_at(56);
196        let first_data_rrv_identifier = u32_at(60);
197        let available_data_rrv_identifier = u32_at(64);
198        let number_of_available_data_rrvs = u16_at(68);
199        let rrv_bucket_size = u16_at(70);
200        let summary_bucket_size = u16_at(72);
201        let bitmap_size = u16_at(74);
202        let allocation_granularity = u16_at(76);
203        let extention_granularity = u32_at(78);
204        let file_size_pages = u32_at(82);
205
206        Ok(Self {
207            db_header_size,
208            ods: Ods::new(ods_raw),
209            database_id,
210            app_version,
211            non_data_rrv_bucket_position,
212            available_non_data_rrv_identifier,
213            number_of_available_non_data_rrvs,
214            activity_log_offset,
215            bucket_modification,
216            database_class,
217            database_flags,
218            bucket_descriptor_block_size,
219            bucket_descriptor_block_position,
220            bdt_size,
221            bdt_position,
222            bdt_bitmaps,
223            data_rrv_bucket_position,
224            first_data_rrv_identifier,
225            available_data_rrv_identifier,
226            number_of_available_data_rrvs,
227            rrv_bucket_size,
228            summary_bucket_size,
229            bitmap_size,
230            allocation_granularity,
231            extention_granularity,
232            file_size_pages,
233        })
234    }
235
236    /// True if the database is flagged as a template (.ntf semantics).
237    /// Verified empirically against the corpus: set on every .ntf,
238    /// clear on every .nsf.
239    pub fn is_template(&self) -> bool {
240        self.database_flags & flags::DBFLAG_TEMPLATE != 0
241    }
242
243    /// Encryption detection: NOT IMPLEMENTED in v0.1.
244    ///
245    /// The libnsfdb spec leaves the encryption-flag bit position as
246    /// TODO. The widely-cited 0x0040 value does NOT match the corpus
247    /// (every sample has that bit set; none are encrypted). The
248    /// authoritative bit lives in HCL's `dbopts.h` which we have not
249    /// yet imported.
250    ///
251    /// Returns `None` until detection is reliable. The viewer surfaces
252    /// this as "encryption detection deferred" rather than reporting
253    /// false negatives.
254    pub fn is_database_encrypted(&self) -> Option<bool> {
255        None
256    }
257
258    /// Convenience: file-size estimate from the header's
259    /// 256-byte-increment field. Multiply by 256.
260    pub fn file_size_from_header_bytes(&self) -> u64 {
261        (self.file_size_pages as u64) * 256
262    }
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268
269    /// Build a minimal-but-valid header for unit tests. Values are
270    /// chosen to be unambiguous (no zeros that overlap with field
271    /// defaults).
272    fn synthetic_header(ods: u32, flags: u16) -> Vec<u8> {
273        let mut buf = vec![0u8; 256];
274        // File header: LSIG + db_header_size = 1024.
275        buf[0] = 0x1A;
276        buf[1] = 0x00;
277        buf[2..6].copy_from_slice(&1024u32.to_le_bytes());
278        // DBINFO @ file offset 6.
279        // ODS at DBINFO offset 0 (file 6).
280        buf[6..10].copy_from_slice(&ods.to_le_bytes());
281        // database_flags at DBINFO offset 38 (file 44).
282        buf[44..46].copy_from_slice(&flags.to_le_bytes());
283        // bucket_descriptor_block_position at DBINFO offset 44 (file 50).
284        buf[50..54].copy_from_slice(&0x0000_4000u32.to_le_bytes());
285        // data_rrv_bucket_position at DBINFO offset 56 (file 62).
286        buf[62..66].copy_from_slice(&0x0000_2af0u32.to_le_bytes());
287        // file_size at DBINFO offset 82 (file 88).
288        buf[88..92].copy_from_slice(&5000u32.to_le_bytes());
289        buf
290    }
291
292    #[test]
293    fn parses_synthetic_ods_53_unencrypted() {
294        let buf = synthetic_header(53, 0);
295        let h = DbHeader::parse(&buf).unwrap();
296        assert_eq!(h.db_header_size, 1024);
297        assert_eq!(h.ods.raw, 53);
298        assert!(!h.is_template());
299        assert!(h.is_database_encrypted().is_none(), "encryption detection deferred");
300        assert_eq!(h.bucket_descriptor_block_position, 0x0000_4000);
301        assert_eq!(h.data_rrv_bucket_position, 0x0000_2af0);
302        assert_eq!(h.file_size_pages, 5000);
303        assert_eq!(h.file_size_from_header_bytes(), 5000 * 256);
304    }
305
306    #[test]
307    fn flags_template_decodes_correctly() {
308        let buf = synthetic_header(53, flags::DBFLAG_TEMPLATE);
309        let h = DbHeader::parse(&buf).unwrap();
310        assert!(h.is_template());
311    }
312
313    #[test]
314    fn rejects_bad_magic() {
315        let mut buf = synthetic_header(53, 0);
316        buf[0] = 0xDE;
317        buf[1] = 0xAD;
318        let err = DbHeader::parse(&buf).unwrap_err();
319        assert!(matches!(err, NsfError::BadFileSignature { .. }));
320    }
321
322    #[test]
323    fn rejects_too_short_for_dbinfo() {
324        let buf: Vec<u8> = vec![0x1A, 0x00, 0x00, 0x04, 0x00, 0x00];
325        let err = DbHeader::parse(&buf).unwrap_err();
326        assert!(matches!(err, NsfError::TooShort { .. }));
327    }
328
329    #[test]
330    fn ods_supported_check_works_via_header() {
331        let buf_modern = synthetic_header(53, 0);
332        let h_modern = DbHeader::parse(&buf_modern).unwrap();
333        assert!(h_modern.ods.is_supported_for_enumeration());
334
335        let buf_legacy = synthetic_header(17, 0);
336        let h_legacy = DbHeader::parse(&buf_legacy).unwrap();
337        assert!(!h_legacy.ods.is_supported_for_enumeration());
338    }
339
340    #[test]
341    fn parses_canonical_comparedbs_ntf_header_bytes() {
342        // First 96 bytes of comparedbs.ntf from the corpus. Pinned here
343        // so any future regression in field-offset arithmetic is
344        // immediately visible. Generated by xxd of the real file.
345        #[rustfmt::skip]
346        let bytes: &[u8] = &[
347            0x1a, 0x00, 0x00, 0x04, 0x00, 0x00, 0x34, 0x00,
348            0x00, 0x00, 0xa9, 0xf4, 0x61, 0x00, 0x0c, 0x88,
349            0x25, 0x85, 0x00, 0x00, 0xe0, 0x03, 0x00, 0x00,
350            0xf6, 0x03, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00,
351            0x00, 0x00, 0x3f, 0x08, 0x62, 0x00, 0x0c, 0x88,
352            0x25, 0x00, 0x04, 0xff, 0x50, 0x42, 0x00, 0x00,
353            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
354            0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0xf0, 0x2a,
355            0x00, 0x00, 0xf6, 0x08, 0x00, 0x00, 0x5a, 0x09,
356            0x00, 0x00, 0xe3, 0x01, 0x00, 0x10, 0x00, 0x20,
357            0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00,
358            0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
359        ];
360        // Pad to the DBINFO_CORE_MIN size with zeros (rest is unused).
361        let mut buf = bytes.to_vec();
362        buf.resize(256, 0);
363        let h = DbHeader::parse(&buf).unwrap();
364        // ODS 52 = Notes 9.0.1.
365        assert_eq!(h.ods.raw, 52);
366        // Template flag set (.ntf).
367        assert!(h.is_template(), "comparedbs.ntf flags = 0x{:04X}", h.database_flags);
368        // BDB is genuinely zero on this template; data RRV is at 0x2af0.
369        assert_eq!(h.bucket_descriptor_block_position, 0);
370        assert_eq!(h.data_rrv_bucket_position, 0x2af0);
371        // File size 0x3000 pages = 0x300000 = 3 MB (matches actual 3.1 MB).
372        assert_eq!(h.file_size_pages, 0x3000);
373        // RRV bucket size = 0x1000 = 4 KB pages, the modern Domino default.
374        assert_eq!(h.rrv_bucket_size, 0x1000);
375    }
376}