Skip to main content

sherlock_nsf_parser/
bucket.rs

1//! Generic bucket (and bucket-shaped page) parsing.
2//!
3//! A bucket is the unit of allocation inside an NSF file. The DBINFO
4//! header reports the size of summary buckets and RRV buckets
5//! separately. Every bucket starts with a `nsfdb_bucket_header`
6//! (signature 0x02, 66 bytes including footer-pointer fields) and
7//! optionally ends with a `nsfdb_bucket_footer`. Note records and
8//! summary item data live inside buckets, indexed by slot.
9//!
10//! Layout per `libnsfdb/nsfdb_bucket.h`:
11//!
12//! ```text
13//! offset  width  field
14//!     0      1   signature (0x02)
15//!     1      1   header_size
16//!     2      4   unknown1
17//!     6      4   size
18//!    10      8   modification_time (TIMEDATE)
19//!    18     20   unknown2
20//!    38      2   unknown3
21//!    40      4   checksum
22//!    44      4   number_of_slots
23//!    48      2   unknown4
24//!    50      4   footer_size
25//!    54     12   unknown5
26//! ```
27
28use crate::error::NsfError;
29use crate::time::Timedate;
30
31/// Expected signature byte at offset 0 of every bucket.
32pub const BUCKET_SIGNATURE: u8 = 0x02;
33/// Minimum bytes a bucket header consumes on disk.
34pub const BUCKET_HEADER_BYTES: usize = 66;
35
36/// Parsed bucket header. Field naming matches the libnsfdb struct.
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub struct BucketHeader {
39    /// Header size as declared on disk (typically 0x42 = 66 for modern
40    /// ODS). Surfaced for diagnostics; consumers should use
41    /// [`BUCKET_HEADER_BYTES`] when offsetting into bucket data.
42    pub header_size: u8,
43    /// Total bucket size in bytes (including header + slots + footer).
44    pub size: u32,
45    /// Most recent modification time.
46    pub modification_time: Timedate,
47    /// XOR-32 checksum of the header.
48    pub checksum: u32,
49    /// Number of slots in this bucket. Each slot holds one allocated
50    /// record (a note record in a non-summary bucket; summary-item
51    /// data in a summary bucket).
52    pub number_of_slots: u32,
53    /// Footer size in bytes.
54    pub footer_size: u32,
55}
56
57impl BucketHeader {
58    /// Parse a bucket header from the first 66 bytes of bucket data.
59    /// Errors on signature mismatch or short input.
60    pub fn parse(bytes: &[u8]) -> Result<Self, NsfError> {
61        if bytes.len() < BUCKET_HEADER_BYTES {
62            return Err(NsfError::TooShort {
63                actual: bytes.len(),
64                required: BUCKET_HEADER_BYTES,
65            });
66        }
67        if bytes[0] != BUCKET_SIGNATURE {
68            return Err(NsfError::BadFileSignature {
69                observed: [bytes[0], 0],
70            });
71        }
72        let header_size = bytes[1];
73        let size = u32::from_le_bytes([bytes[6], bytes[7], bytes[8], bytes[9]]);
74        let modification_time = Timedate::from_bytes(&bytes[10..18])?;
75        let checksum = u32::from_le_bytes([bytes[40], bytes[41], bytes[42], bytes[43]]);
76        let number_of_slots =
77            u32::from_le_bytes([bytes[44], bytes[45], bytes[46], bytes[47]]);
78        let footer_size =
79            u32::from_le_bytes([bytes[50], bytes[51], bytes[52], bytes[53]]);
80        Ok(Self {
81            header_size,
82            size,
83            modification_time,
84            checksum,
85            number_of_slots,
86            footer_size,
87        })
88    }
89}
90
91/// One entry of a bucket's slot-index table: where a slot's bytes live
92/// inside the bucket and how many bytes they span.
93#[derive(Debug, Clone, Copy, PartialEq, Eq)]
94pub struct BucketSlot {
95    /// Byte offset of the slot's data, measured from the start of the
96    /// bucket (i.e. into the same buffer the bucket header was parsed
97    /// from). Per `libnsfdb_bucket_read_slots` the slot data is addressed
98    /// as `bucket_data[offset]`, not relative to the end of the header.
99    pub offset: u16,
100    /// Size in bytes of the slot's data.
101    pub size: u16,
102}
103
104/// A borrowed view over a single bucket: the parsed header plus the
105/// backing bytes, with slot-table resolution.
106///
107/// Bucket layout, reverse-engineered from `libnsfdb_bucket.c`:
108///
109/// ```text
110/// +------------------+ offset 0
111/// | header (66 B)    |
112/// +------------------+
113/// | slot data ...    |  slot bytes live here, addressed by absolute
114/// |                  |  offset from the bucket start
115/// +------------------+
116/// | slot-index table |  number_of_slots entries, 4 bytes each,
117/// |                  |  stored BACK-TO-FRONT: slot 0 occupies the last
118/// |                  |  4 bytes before the footer. Each entry is
119/// |                  |  [u16 offset][u16 size] (offset at the lower
120/// |                  |  address, size at the higher).
121/// +------------------+ size - footer_size
122/// | footer           |
123/// +------------------+ size
124/// ```
125///
126/// Slot indices on disk are **1-based** (`libnsfdb_bucket_get_slot`
127/// rejects index 0 and looks up `slot_index - 1`); RRV bucket-slot
128/// entries carry the 1-based value, so [`Bucket::slot`] takes it as-is.
129#[derive(Debug)]
130pub struct Bucket<'a> {
131    header: BucketHeader,
132    /// Bytes from the bucket start, bounded to the bucket's declared
133    /// `size` (or the available tail, whichever is shorter).
134    bytes: &'a [u8],
135}
136
137impl<'a> Bucket<'a> {
138    /// Parse a bucket from a buffer positioned at the bucket's file
139    /// offset. The buffer may extend past the bucket (e.g. it is the
140    /// remainder of the whole file); this constructor clamps the view to
141    /// the bucket's declared `size` so slot resolution cannot read into a
142    /// neighbouring structure.
143    pub fn parse(bytes: &'a [u8]) -> Result<Self, NsfError> {
144        let header = BucketHeader::parse(bytes)?;
145        let declared = header.size as usize;
146        // Clamp to declared size when the buffer is longer; tolerate a
147        // buffer shorter than declared (truncated tail) by keeping what
148        // we have - slot accessors bounds-check individually.
149        let end = declared.min(bytes.len());
150        Ok(Self {
151            header,
152            bytes: &bytes[..end],
153        })
154    }
155
156    /// The parsed bucket header.
157    pub fn header(&self) -> &BucketHeader {
158        &self.header
159    }
160
161    /// Number of slots the bucket declares.
162    pub fn slot_count(&self) -> u32 {
163        self.header.number_of_slots
164    }
165
166    /// Resolve the slot-index entry for a 1-based `slot_index`.
167    ///
168    /// Returns [`NsfError::SlotIndexOutOfRange`] if the index is zero or
169    /// beyond the declared slot count, or [`NsfError::TooShort`] if the
170    /// bucket buffer does not actually contain the slot-index table the
171    /// header advertises (truncated / corrupt bucket).
172    pub fn slot_entry(&self, slot_index: u16) -> Result<BucketSlot, NsfError> {
173        let count = self.header.number_of_slots;
174        if slot_index == 0 || u32::from(slot_index) > count {
175            return Err(NsfError::SlotIndexOutOfRange {
176                requested: slot_index,
177                available: count,
178            });
179        }
180        // The slot-index table sits immediately below the footer and
181        // grows downward: slot 0 is the last 4 bytes before the footer.
182        let table_end = (self.bytes.len()).saturating_sub(self.header.footer_size as usize);
183        // Entry for the (1-based) slot: 0-based ordinal is slot_index - 1.
184        let ordinal = (slot_index - 1) as usize;
185        // Lower bound of this entry within the table.
186        let entry_base = match table_end.checked_sub(4 * (ordinal + 1)) {
187            Some(b) => b,
188            None => {
189                return Err(NsfError::TooShort {
190                    actual: self.bytes.len(),
191                    required: 4 * (ordinal + 1),
192                })
193            }
194        };
195        let entry = self.bytes.get(entry_base..entry_base + 4).ok_or(NsfError::TooShort {
196            actual: self.bytes.len(),
197            required: entry_base + 4,
198        })?;
199        let offset = u16::from_le_bytes([entry[0], entry[1]]);
200        let size = u16::from_le_bytes([entry[2], entry[3]]);
201        Ok(BucketSlot { offset, size })
202    }
203
204    /// Return the raw bytes of the slot at the 1-based `slot_index`.
205    ///
206    /// Bounds-checks the slot's `(offset, size)` against the bucket so a
207    /// corrupt slot-index table cannot read out of bounds; returns
208    /// [`NsfError::TooShort`] in that case.
209    pub fn slot(&self, slot_index: u16) -> Result<&'a [u8], NsfError> {
210        let BucketSlot { offset, size } = self.slot_entry(slot_index)?;
211        let start = offset as usize;
212        let end = start + size as usize;
213        self.bytes.get(start..end).ok_or(NsfError::TooShort {
214            actual: self.bytes.len(),
215            required: end,
216        })
217    }
218}
219
220#[cfg(test)]
221mod tests {
222    use super::*;
223
224    fn synthetic_bucket() -> Vec<u8> {
225        let mut buf = vec![0u8; 128];
226        buf[0] = BUCKET_SIGNATURE;
227        buf[1] = 0x42;
228        buf[6..10].copy_from_slice(&4096u32.to_le_bytes());
229        // Modification timedate: leave zero; valid as opaque.
230        buf[44..48].copy_from_slice(&100u32.to_le_bytes());
231        buf[50..54].copy_from_slice(&12u32.to_le_bytes());
232        buf
233    }
234
235    #[test]
236    fn parses_synthetic_bucket() {
237        let buf = synthetic_bucket();
238        let h = BucketHeader::parse(&buf).unwrap();
239        assert_eq!(h.header_size, 0x42);
240        assert_eq!(h.size, 4096);
241        assert_eq!(h.number_of_slots, 100);
242        assert_eq!(h.footer_size, 12);
243    }
244
245    #[test]
246    fn rejects_bad_signature() {
247        let mut buf = synthetic_bucket();
248        buf[0] = 0xFF;
249        assert!(BucketHeader::parse(&buf).is_err());
250    }
251
252    /// Build a synthetic bucket with two slots so the back-to-front
253    /// slot-index table can be exercised end to end.
254    ///
255    /// Layout (size = 100, footer = 12):
256    /// - header  : [0, 66)
257    /// - slot 0  : [66, 70)  = 4 bytes of 0xAA
258    /// - slot 1  : [70, 73)  = 3 bytes of 0xBB
259    /// - table   : [80, 88)  slot 1 entry then slot 0 entry (downward)
260    /// - footer  : [88, 100)
261    fn synthetic_bucket_with_slots() -> Vec<u8> {
262        let mut buf = vec![0u8; 100];
263        buf[0] = BUCKET_SIGNATURE;
264        buf[1] = 0x42;
265        buf[6..10].copy_from_slice(&100u32.to_le_bytes()); // size
266        buf[44..48].copy_from_slice(&2u32.to_le_bytes()); // number_of_slots
267        buf[50..54].copy_from_slice(&12u32.to_le_bytes()); // footer_size
268        // slot data
269        buf[66..70].copy_from_slice(&[0xAA; 4]);
270        buf[70..73].copy_from_slice(&[0xBB; 3]);
271        // slot-index table, back-to-front. table_end = 100 - 12 = 88.
272        // slot 0 entry at [84, 88): offset=66, size=4
273        buf[84..86].copy_from_slice(&66u16.to_le_bytes());
274        buf[86..88].copy_from_slice(&4u16.to_le_bytes());
275        // slot 1 entry at [80, 84): offset=70, size=3
276        buf[80..82].copy_from_slice(&70u16.to_le_bytes());
277        buf[82..84].copy_from_slice(&3u16.to_le_bytes());
278        buf
279    }
280
281    #[test]
282    fn bucket_resolves_one_based_slots() {
283        let buf = synthetic_bucket_with_slots();
284        let bucket = Bucket::parse(&buf).unwrap();
285        assert_eq!(bucket.slot_count(), 2);
286        // 1-based: slot 1 is the first slot.
287        assert_eq!(bucket.slot(1).unwrap(), &[0xAA; 4]);
288        assert_eq!(bucket.slot(2).unwrap(), &[0xBB; 3]);
289    }
290
291    #[test]
292    fn bucket_rejects_slot_index_zero() {
293        let buf = synthetic_bucket_with_slots();
294        let bucket = Bucket::parse(&buf).unwrap();
295        let err = bucket.slot(0).unwrap_err();
296        assert!(matches!(
297            err,
298            NsfError::SlotIndexOutOfRange {
299                requested: 0,
300                available: 2
301            }
302        ));
303    }
304
305    #[test]
306    fn bucket_rejects_slot_index_past_end() {
307        let buf = synthetic_bucket_with_slots();
308        let bucket = Bucket::parse(&buf).unwrap();
309        let err = bucket.slot(3).unwrap_err();
310        assert!(matches!(
311            err,
312            NsfError::SlotIndexOutOfRange {
313                requested: 3,
314                available: 2
315            }
316        ));
317    }
318
319    #[test]
320    fn bucket_clamps_view_to_declared_size() {
321        let mut buf = synthetic_bucket_with_slots();
322        // Append trailing bytes from a "neighbouring" structure; the
323        // bucket view must not read into them.
324        buf.extend_from_slice(&[0x99; 64]);
325        let bucket = Bucket::parse(&buf).unwrap();
326        // Still resolves correctly against the clamped 100-byte view.
327        assert_eq!(bucket.slot(2).unwrap(), &[0xBB; 3]);
328    }
329
330    #[test]
331    fn bucket_slot_with_corrupt_offset_errors_not_panics() {
332        let mut buf = synthetic_bucket_with_slots();
333        // Point slot 1's offset past the end of the bucket.
334        buf[84..86].copy_from_slice(&250u16.to_le_bytes());
335        let bucket = Bucket::parse(&buf).unwrap();
336        assert!(matches!(bucket.slot(1), Err(NsfError::TooShort { .. })));
337    }
338
339    #[test]
340    fn bucket_slot_with_corrupt_size_overflow_errors_not_panics() {
341        let mut buf = synthetic_bucket_with_slots();
342        // Slot 1 offset stays valid (66) but size runs past the bucket
343        // end. offset + size must be bounds-checked, not just offset.
344        buf[86..88].copy_from_slice(&250u16.to_le_bytes());
345        let bucket = Bucket::parse(&buf).unwrap();
346        assert!(matches!(bucket.slot(1), Err(NsfError::TooShort { .. })));
347    }
348
349    #[test]
350    fn zero_slot_bucket_reports_no_slots() {
351        let mut buf = synthetic_bucket_with_slots();
352        // Force the declared slot count to zero.
353        buf[44..48].copy_from_slice(&0u32.to_le_bytes());
354        let bucket = Bucket::parse(&buf).unwrap();
355        assert_eq!(bucket.slot_count(), 0);
356        assert!(matches!(
357            bucket.slot(1),
358            Err(NsfError::SlotIndexOutOfRange {
359                requested: 1,
360                available: 0
361            })
362        ));
363    }
364}