sherlock_nsf_parser/bucket.rs
1//! Generic bucket (and bucket-shaped page) parsing.
2//!
3//! A bucket is the unit of allocation inside an NSF file. The DBINFO
4//! header reports the size of summary buckets and RRV buckets
5//! separately. Every bucket starts with a `nsfdb_bucket_header`
6//! (signature 0x02, 66 bytes including footer-pointer fields) and
7//! optionally ends with a `nsfdb_bucket_footer`. Note records and
8//! summary item data live inside buckets, indexed by slot.
9//!
10//! Layout per `libnsfdb/nsfdb_bucket.h`:
11//!
12//! ```text
13//! offset width field
14//! 0 1 signature (0x02)
15//! 1 1 header_size
16//! 2 4 unknown1
17//! 6 4 size
18//! 10 8 modification_time (TIMEDATE)
19//! 18 20 unknown2
20//! 38 2 unknown3
21//! 40 4 checksum
22//! 44 4 number_of_slots
23//! 48 2 unknown4
24//! 50 4 footer_size
25//! 54 12 unknown5
26//! ```
27
28use crate::error::NsfError;
29use crate::time::Timedate;
30
31/// Expected signature byte at offset 0 of every bucket.
32pub const BUCKET_SIGNATURE: u8 = 0x02;
33/// Minimum bytes a bucket header consumes on disk.
34pub const BUCKET_HEADER_BYTES: usize = 66;
35
36/// Parsed bucket header. Field naming matches the libnsfdb struct.
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub struct BucketHeader {
39 /// Header size as declared on disk (typically 0x42 = 66 for modern
40 /// ODS). Surfaced for diagnostics; consumers should use
41 /// [`BUCKET_HEADER_BYTES`] when offsetting into bucket data.
42 pub header_size: u8,
43 /// Total bucket size in bytes (including header + slots + footer).
44 pub size: u32,
45 /// Most recent modification time.
46 pub modification_time: Timedate,
47 /// XOR-32 checksum of the header.
48 pub checksum: u32,
49 /// Number of slots in this bucket. Each slot holds one allocated
50 /// record (a note record in a non-summary bucket; summary-item
51 /// data in a summary bucket).
52 pub number_of_slots: u32,
53 /// Footer size in bytes.
54 pub footer_size: u32,
55}
56
57impl BucketHeader {
58 /// Parse a bucket header from the first 66 bytes of bucket data.
59 /// Errors on signature mismatch or short input.
60 pub fn parse(bytes: &[u8]) -> Result<Self, NsfError> {
61 if bytes.len() < BUCKET_HEADER_BYTES {
62 return Err(NsfError::TooShort {
63 actual: bytes.len(),
64 required: BUCKET_HEADER_BYTES,
65 });
66 }
67 if bytes[0] != BUCKET_SIGNATURE {
68 return Err(NsfError::BadFileSignature {
69 observed: [bytes[0], 0],
70 });
71 }
72 let header_size = bytes[1];
73 let size = u32::from_le_bytes([bytes[6], bytes[7], bytes[8], bytes[9]]);
74 let modification_time = Timedate::from_bytes(&bytes[10..18])?;
75 let checksum = u32::from_le_bytes([bytes[40], bytes[41], bytes[42], bytes[43]]);
76 let number_of_slots =
77 u32::from_le_bytes([bytes[44], bytes[45], bytes[46], bytes[47]]);
78 let footer_size =
79 u32::from_le_bytes([bytes[50], bytes[51], bytes[52], bytes[53]]);
80 Ok(Self {
81 header_size,
82 size,
83 modification_time,
84 checksum,
85 number_of_slots,
86 footer_size,
87 })
88 }
89}
90
91/// One entry of a bucket's slot-index table: where a slot's bytes live
92/// inside the bucket and how many bytes they span.
93#[derive(Debug, Clone, Copy, PartialEq, Eq)]
94pub struct BucketSlot {
95 /// Byte offset of the slot's data, measured from the start of the
96 /// bucket (i.e. into the same buffer the bucket header was parsed
97 /// from). Per `libnsfdb_bucket_read_slots` the slot data is addressed
98 /// as `bucket_data[offset]`, not relative to the end of the header.
99 pub offset: u16,
100 /// Size in bytes of the slot's data.
101 pub size: u16,
102}
103
104/// A borrowed view over a single bucket: the parsed header plus the
105/// backing bytes, with slot-table resolution.
106///
107/// Bucket layout, reverse-engineered from `libnsfdb_bucket.c`:
108///
109/// ```text
110/// +------------------+ offset 0
111/// | header (66 B) |
112/// +------------------+
113/// | slot data ... | slot bytes live here, addressed by absolute
114/// | | offset from the bucket start
115/// +------------------+
116/// | slot-index table | number_of_slots entries, 4 bytes each,
117/// | | stored BACK-TO-FRONT: slot 0 occupies the last
118/// | | 4 bytes before the footer. Each entry is
119/// | | [u16 offset][u16 size] (offset at the lower
120/// | | address, size at the higher).
121/// +------------------+ size - footer_size
122/// | footer |
123/// +------------------+ size
124/// ```
125///
126/// Slot indices on disk are **1-based** (`libnsfdb_bucket_get_slot`
127/// rejects index 0 and looks up `slot_index - 1`); RRV bucket-slot
128/// entries carry the 1-based value, so [`Bucket::slot`] takes it as-is.
129#[derive(Debug)]
130pub struct Bucket<'a> {
131 header: BucketHeader,
132 /// Bytes from the bucket start, bounded to the bucket's declared
133 /// `size` (or the available tail, whichever is shorter).
134 bytes: &'a [u8],
135}
136
137impl<'a> Bucket<'a> {
138 /// Parse a bucket from a buffer positioned at the bucket's file
139 /// offset. The buffer may extend past the bucket (e.g. it is the
140 /// remainder of the whole file); this constructor clamps the view to
141 /// the bucket's declared `size` so slot resolution cannot read into a
142 /// neighbouring structure.
143 pub fn parse(bytes: &'a [u8]) -> Result<Self, NsfError> {
144 let header = BucketHeader::parse(bytes)?;
145 let declared = header.size as usize;
146 // Clamp to declared size when the buffer is longer; tolerate a
147 // buffer shorter than declared (truncated tail) by keeping what
148 // we have - slot accessors bounds-check individually.
149 let end = declared.min(bytes.len());
150 Ok(Self {
151 header,
152 bytes: &bytes[..end],
153 })
154 }
155
156 /// The parsed bucket header.
157 pub fn header(&self) -> &BucketHeader {
158 &self.header
159 }
160
161 /// Number of slots the bucket declares.
162 pub fn slot_count(&self) -> u32 {
163 self.header.number_of_slots
164 }
165
166 /// Resolve the slot-index entry for a 1-based `slot_index`.
167 ///
168 /// Returns [`NsfError::SlotIndexOutOfRange`] if the index is zero or
169 /// beyond the declared slot count, or [`NsfError::TooShort`] if the
170 /// bucket buffer does not actually contain the slot-index table the
171 /// header advertises (truncated / corrupt bucket).
172 pub fn slot_entry(&self, slot_index: u16) -> Result<BucketSlot, NsfError> {
173 let count = self.header.number_of_slots;
174 if slot_index == 0 || u32::from(slot_index) > count {
175 return Err(NsfError::SlotIndexOutOfRange {
176 requested: slot_index,
177 available: count,
178 });
179 }
180 // The slot-index table sits immediately below the footer and
181 // grows downward: slot 0 is the last 4 bytes before the footer.
182 let table_end = (self.bytes.len()).saturating_sub(self.header.footer_size as usize);
183 // Entry for the (1-based) slot: 0-based ordinal is slot_index - 1.
184 let ordinal = (slot_index - 1) as usize;
185 // Lower bound of this entry within the table.
186 let entry_base = match table_end.checked_sub(4 * (ordinal + 1)) {
187 Some(b) => b,
188 None => {
189 return Err(NsfError::TooShort {
190 actual: self.bytes.len(),
191 required: 4 * (ordinal + 1),
192 })
193 }
194 };
195 let entry = self.bytes.get(entry_base..entry_base + 4).ok_or(NsfError::TooShort {
196 actual: self.bytes.len(),
197 required: entry_base + 4,
198 })?;
199 let offset = u16::from_le_bytes([entry[0], entry[1]]);
200 let size = u16::from_le_bytes([entry[2], entry[3]]);
201 Ok(BucketSlot { offset, size })
202 }
203
204 /// Return the raw bytes of the slot at the 1-based `slot_index`.
205 ///
206 /// Bounds-checks the slot's `(offset, size)` against the bucket so a
207 /// corrupt slot-index table cannot read out of bounds; returns
208 /// [`NsfError::TooShort`] in that case.
209 pub fn slot(&self, slot_index: u16) -> Result<&'a [u8], NsfError> {
210 let BucketSlot { offset, size } = self.slot_entry(slot_index)?;
211 let start = offset as usize;
212 let end = start + size as usize;
213 self.bytes.get(start..end).ok_or(NsfError::TooShort {
214 actual: self.bytes.len(),
215 required: end,
216 })
217 }
218}
219
220#[cfg(test)]
221mod tests {
222 use super::*;
223
224 fn synthetic_bucket() -> Vec<u8> {
225 let mut buf = vec![0u8; 128];
226 buf[0] = BUCKET_SIGNATURE;
227 buf[1] = 0x42;
228 buf[6..10].copy_from_slice(&4096u32.to_le_bytes());
229 // Modification timedate: leave zero; valid as opaque.
230 buf[44..48].copy_from_slice(&100u32.to_le_bytes());
231 buf[50..54].copy_from_slice(&12u32.to_le_bytes());
232 buf
233 }
234
235 #[test]
236 fn parses_synthetic_bucket() {
237 let buf = synthetic_bucket();
238 let h = BucketHeader::parse(&buf).unwrap();
239 assert_eq!(h.header_size, 0x42);
240 assert_eq!(h.size, 4096);
241 assert_eq!(h.number_of_slots, 100);
242 assert_eq!(h.footer_size, 12);
243 }
244
245 #[test]
246 fn rejects_bad_signature() {
247 let mut buf = synthetic_bucket();
248 buf[0] = 0xFF;
249 assert!(BucketHeader::parse(&buf).is_err());
250 }
251
252 /// Build a synthetic bucket with two slots so the back-to-front
253 /// slot-index table can be exercised end to end.
254 ///
255 /// Layout (size = 100, footer = 12):
256 /// - header : [0, 66)
257 /// - slot 0 : [66, 70) = 4 bytes of 0xAA
258 /// - slot 1 : [70, 73) = 3 bytes of 0xBB
259 /// - table : [80, 88) slot 1 entry then slot 0 entry (downward)
260 /// - footer : [88, 100)
261 fn synthetic_bucket_with_slots() -> Vec<u8> {
262 let mut buf = vec![0u8; 100];
263 buf[0] = BUCKET_SIGNATURE;
264 buf[1] = 0x42;
265 buf[6..10].copy_from_slice(&100u32.to_le_bytes()); // size
266 buf[44..48].copy_from_slice(&2u32.to_le_bytes()); // number_of_slots
267 buf[50..54].copy_from_slice(&12u32.to_le_bytes()); // footer_size
268 // slot data
269 buf[66..70].copy_from_slice(&[0xAA; 4]);
270 buf[70..73].copy_from_slice(&[0xBB; 3]);
271 // slot-index table, back-to-front. table_end = 100 - 12 = 88.
272 // slot 0 entry at [84, 88): offset=66, size=4
273 buf[84..86].copy_from_slice(&66u16.to_le_bytes());
274 buf[86..88].copy_from_slice(&4u16.to_le_bytes());
275 // slot 1 entry at [80, 84): offset=70, size=3
276 buf[80..82].copy_from_slice(&70u16.to_le_bytes());
277 buf[82..84].copy_from_slice(&3u16.to_le_bytes());
278 buf
279 }
280
281 #[test]
282 fn bucket_resolves_one_based_slots() {
283 let buf = synthetic_bucket_with_slots();
284 let bucket = Bucket::parse(&buf).unwrap();
285 assert_eq!(bucket.slot_count(), 2);
286 // 1-based: slot 1 is the first slot.
287 assert_eq!(bucket.slot(1).unwrap(), &[0xAA; 4]);
288 assert_eq!(bucket.slot(2).unwrap(), &[0xBB; 3]);
289 }
290
291 #[test]
292 fn bucket_rejects_slot_index_zero() {
293 let buf = synthetic_bucket_with_slots();
294 let bucket = Bucket::parse(&buf).unwrap();
295 let err = bucket.slot(0).unwrap_err();
296 assert!(matches!(
297 err,
298 NsfError::SlotIndexOutOfRange {
299 requested: 0,
300 available: 2
301 }
302 ));
303 }
304
305 #[test]
306 fn bucket_rejects_slot_index_past_end() {
307 let buf = synthetic_bucket_with_slots();
308 let bucket = Bucket::parse(&buf).unwrap();
309 let err = bucket.slot(3).unwrap_err();
310 assert!(matches!(
311 err,
312 NsfError::SlotIndexOutOfRange {
313 requested: 3,
314 available: 2
315 }
316 ));
317 }
318
319 #[test]
320 fn bucket_clamps_view_to_declared_size() {
321 let mut buf = synthetic_bucket_with_slots();
322 // Append trailing bytes from a "neighbouring" structure; the
323 // bucket view must not read into them.
324 buf.extend_from_slice(&[0x99; 64]);
325 let bucket = Bucket::parse(&buf).unwrap();
326 // Still resolves correctly against the clamped 100-byte view.
327 assert_eq!(bucket.slot(2).unwrap(), &[0xBB; 3]);
328 }
329
330 #[test]
331 fn bucket_slot_with_corrupt_offset_errors_not_panics() {
332 let mut buf = synthetic_bucket_with_slots();
333 // Point slot 1's offset past the end of the bucket.
334 buf[84..86].copy_from_slice(&250u16.to_le_bytes());
335 let bucket = Bucket::parse(&buf).unwrap();
336 assert!(matches!(bucket.slot(1), Err(NsfError::TooShort { .. })));
337 }
338
339 #[test]
340 fn bucket_slot_with_corrupt_size_overflow_errors_not_panics() {
341 let mut buf = synthetic_bucket_with_slots();
342 // Slot 1 offset stays valid (66) but size runs past the bucket
343 // end. offset + size must be bounds-checked, not just offset.
344 buf[86..88].copy_from_slice(&250u16.to_le_bytes());
345 let bucket = Bucket::parse(&buf).unwrap();
346 assert!(matches!(bucket.slot(1), Err(NsfError::TooShort { .. })));
347 }
348
349 #[test]
350 fn zero_slot_bucket_reports_no_slots() {
351 let mut buf = synthetic_bucket_with_slots();
352 // Force the declared slot count to zero.
353 buf[44..48].copy_from_slice(&0u32.to_le_bytes());
354 let bucket = Bucket::parse(&buf).unwrap();
355 assert_eq!(bucket.slot_count(), 0);
356 assert!(matches!(
357 bucket.slot(1),
358 Err(NsfError::SlotIndexOutOfRange {
359 requested: 1,
360 available: 0
361 })
362 ));
363 }
364}