sherlock_nsf_parser/database.rs
1//! High-level `Database::open` API.
2//!
3//! Pulls the file header + DBINFO together, then exposes the entry
4//! points for note enumeration. The actual RRV walk requires having
5//! the file mmapped or fully buffered; this layer keeps the byte
6//! window borrowed so consumers control I/O strategy.
7
8use crate::bdb::BucketDescriptorBlock;
9use crate::bucket::Bucket;
10use crate::cx;
11use crate::error::NsfError;
12use crate::header::DbHeader;
13use crate::info2::{Information2, INFO2_BYTES, INFO2_FILE_OFFSET};
14use crate::note::NoteHeader;
15use crate::rrv::{RrvBucketHeader, RrvEntry, RrvIter, RrvLocation};
16use crate::superblock::{select_freshest, Superblock, SUPERBLOCK_HEADER_BYTES};
17
18/// Body offset where the resident summary-descriptor page begins inside a
19/// single-page database (the libnsfdb-documented prefix `4 + 10 + 10 +
20/// 200`). For a multi-page database the resident page sits after the page
21/// index: `SUMMARY_RESIDENT_PREFIX + (pages - 1) * SUMMARY_DESCRIPTOR_BYTES`.
22const SUMMARY_RESIDENT_PREFIX: usize = 224;
23/// On-disk size of one summary bucket descriptor (`file_position[4] +
24/// modification_time[8] + 2 free-byte fields`).
25const SUMMARY_DESCRIPTOR_BYTES: usize = 14;
26/// Header size that precedes the descriptor array inside an *out-of-body*
27/// summary descriptor page (the pages pointed to by the body page index).
28/// Empirically derived (validated to 99.3% against the fakenames identity
29/// oracle); see the `nsf_b2_addressing_cracked` engineering note. Distinct
30/// from the in-body resident page, which uses [`SUMMARY_RESIDENT_PREFIX`].
31const OUT_OF_BODY_PAGE_HEADER: usize = 250;
32/// Number of bucket descriptors per out-of-body summary page. Empirically
33/// derived (the resident page base lands at `(pages-1)*PER_OUT_OF_BODY_PAGE
34/// + 1`, exactly matching the observed bucket_index range). The resident
35/// page's count comes from `Superblock::number_of_summary_buckets` instead.
36const PER_OUT_OF_BODY_PAGE: usize = 567;
37
38fn read_u32_le(buf: &[u8], offset: usize) -> Option<u32> {
39 buf.get(offset..offset + 4)
40 .map(|b| u32::from_le_bytes([b[0], b[1], b[2], b[3]]))
41}
42
43/// Top-level handle to a buffered NSF file.
44///
45/// Holds a borrowed slice of the full file bytes. Cheap to construct -
46/// no copies are made. The parser walks the file lazily; consumers pay
47/// for what they enumerate.
48#[derive(Debug)]
49pub struct Database<'a> {
50 bytes: &'a [u8],
51 header: DbHeader,
52}
53
54impl<'a> Database<'a> {
55 /// Open an NSF from a full-file byte buffer. Validates the file
56 /// header and DBINFO; lazy on everything else.
57 pub fn open(bytes: &'a [u8]) -> Result<Self, NsfError> {
58 let header = DbHeader::parse(bytes)?;
59 Ok(Self { bytes, header })
60 }
61
62 /// Parsed database header.
63 pub fn header(&self) -> &DbHeader {
64 &self.header
65 }
66
67 /// True when the database carries a populated data RRV bucket. A
68 /// fresh / never-instantiated template will return false here -
69 /// it has design notes via the non-data RRV but no data notes.
70 pub fn has_data_rrv(&self) -> bool {
71 self.header.data_rrv_bucket_position != 0
72 }
73
74 /// Parse + iterate the data RRV bucket if present. Returns the
75 /// bucket header for diagnostics plus an iterator over the
76 /// non-empty RRV entries.
77 ///
78 /// The data RRV bucket's file position is reported in 256-byte
79 /// units in DBINFO; this method converts to a byte offset and
80 /// reads `rrv_bucket_size` bytes from that point.
81 pub fn data_rrv_iter(&self) -> Result<Option<(RrvBucketHeader, RrvIter<'a>)>, NsfError> {
82 if !self.has_data_rrv() {
83 return Ok(None);
84 }
85 let byte_offset = u64::from(self.header.data_rrv_bucket_position) * 256;
86 let bucket_size = self.header.rrv_bucket_size as u64;
87 let end = byte_offset.saturating_add(bucket_size);
88 if end > self.bytes.len() as u64 {
89 return Err(NsfError::TooShort {
90 actual: self.bytes.len(),
91 required: end as usize,
92 });
93 }
94 let bucket = &self.bytes[byte_offset as usize..end as usize];
95 let (header, iter) = RrvIter::new(bucket)?;
96 Ok(Some((header, iter)))
97 }
98
99 /// Convenience: count non-empty entries in the data RRV. Walks the
100 /// bucket but does not retain the per-entry state.
101 pub fn data_note_count(&self) -> Result<u64, NsfError> {
102 let Some((_, iter)) = self.data_rrv_iter()? else {
103 return Ok(0);
104 };
105 Ok(iter.count() as u64)
106 }
107
108 /// True when the database carries a populated non-data RRV bucket.
109 /// Design notes (forms, views) and, in databases like `fakenames.nsf`,
110 /// the bulk of document notes are reached through the non-data RRV
111 /// rather than the data RRV.
112 pub fn has_non_data_rrv(&self) -> bool {
113 self.header.non_data_rrv_bucket_position != 0
114 }
115
116 /// Parse + iterate the non-data RRV bucket if present. Mirrors
117 /// [`Self::data_rrv_iter`] but reads from
118 /// `non_data_rrv_bucket_position`. Most bucket-slot RRV entries (the
119 /// ones [`Self::resolve_bucket_slot`] resolves) live here.
120 pub fn non_data_rrv_iter(&self) -> Result<Option<(RrvBucketHeader, RrvIter<'a>)>, NsfError> {
121 if !self.has_non_data_rrv() {
122 return Ok(None);
123 }
124 let byte_offset = u64::from(self.header.non_data_rrv_bucket_position) * 256;
125 let bucket_size = self.header.rrv_bucket_size as u64;
126 let end = byte_offset.saturating_add(bucket_size);
127 if end > self.bytes.len() as u64 {
128 return Err(NsfError::TooShort {
129 actual: self.bytes.len(),
130 required: end as usize,
131 });
132 }
133 let bucket = &self.bytes[byte_offset as usize..end as usize];
134 let (header, iter) = RrvIter::new(bucket)?;
135 Ok(Some((header, iter)))
136 }
137
138 /// Collect at most `limit` RRV entries from the data RRV for
139 /// preview / list rendering. Useful for "show the first 200 notes
140 /// in the viewer" without walking 40,000 entries up front.
141 pub fn data_rrv_take(&self, limit: usize) -> Result<Vec<RrvEntry>, NsfError> {
142 let Some((_, iter)) = self.data_rrv_iter()? else {
143 return Ok(Vec::new());
144 };
145 Ok(iter.take(limit).collect())
146 }
147
148 /// Parse the database information extension block 2 (file offset 520,
149 /// 124 bytes). Carries the 4 superblock positions + 2 BDB positions
150 /// plus bucket-size knobs.
151 pub fn information2(&self) -> Result<Information2, NsfError> {
152 let end = INFO2_FILE_OFFSET + INFO2_BYTES;
153 if self.bytes.len() < end {
154 return Err(NsfError::TooShort {
155 actual: self.bytes.len(),
156 required: end,
157 });
158 }
159 Information2::parse(&self.bytes[INFO2_FILE_OFFSET..end])
160 }
161
162 /// Parse every populated superblock copy (skipping uninitialized
163 /// slots). Each entry is `(slot_index, Superblock)` so callers can
164 /// report which copy was loaded. Domino allocates 4 slots and rotates
165 /// commits across them; instantiated databases typically have 3
166 /// populated and 1 empty, with the freshest by `modification_time`
167 /// authoritative (use [`Self::freshest_superblock`]).
168 ///
169 /// Forensic-tool-grade resilience: slots are skipped silently when
170 /// any of these conditions hold, rather than crashing the load:
171 ///
172 /// - Slot is empty (position or size zero).
173 /// - Slot's declared byte offset extends past the file end.
174 /// - Slot's body does not start with the superblock signature
175 /// `0E 00`. This catches fresh-template uninitialized regions
176 /// that Domino allocates with `allocation_granularity` but never
177 /// commits to (empirically these are filled with `AA AA AA AA`,
178 /// e.g. SB3 of `comparedbs.ntf`).
179 ///
180 /// Other parse failures (e.g. unexpected short read mid-header) are
181 /// not expected in practice with a fully-buffered NSF and would
182 /// surface as errors. The 3-redundant-copy WAL guarantees that
183 /// silently dropping an unreadable slot leaves at least one valid
184 /// copy.
185 pub fn superblocks(&self) -> Result<Vec<(usize, Superblock)>, NsfError> {
186 let info = self.information2()?;
187 let mut out = Vec::with_capacity(4);
188 for (i, slot) in info.superblocks.iter().enumerate() {
189 let Some(byte_offset) = slot.byte_offset() else {
190 continue;
191 };
192 let start = byte_offset as usize;
193 let end = start.saturating_add(SUPERBLOCK_HEADER_BYTES);
194 if end > self.bytes.len() {
195 continue;
196 }
197 match Superblock::parse(&self.bytes[start..end]) {
198 Ok(sb) => out.push((i, sb)),
199 Err(NsfError::BadSubrecordSignature { .. }) => {
200 // Uninitialized / 0xAA-filled region. Skip silently.
201 }
202 Err(other) => return Err(other),
203 }
204 }
205 Ok(out)
206 }
207
208 /// Convenience: parse all populated superblocks and return the
209 /// freshest one by `modification_time`. The other three copies are
210 /// write-ahead-log redundancy and should be ignored once this one
211 /// is loaded. Returns `None` if no superblock slots are populated
212 /// (extremely rare; would indicate a partially-initialized NSF).
213 pub fn freshest_superblock(&self) -> Result<Option<(usize, Superblock)>, NsfError> {
214 let all = self.superblocks()?;
215 Ok(select_freshest(&all))
216 }
217
218 /// Decompress the freshest superblock's body (the CX-compressed region
219 /// that carries the bucket-descriptor array). Returns `None` when the
220 /// database has no superblock.
221 ///
222 /// Body layout from the superblock byte offset, per the reference:
223 /// `[0,100)` header, then the compressed region of length
224 /// `size - 112` (100-byte header + 12-byte footer removed), of which
225 /// the first 4 bytes are a prefix the decompressor skips. The
226 /// decompressed length is the header's `uncompressed_size` field.
227 pub fn decompressed_superblock_body(&self) -> Result<Option<Vec<u8>>, NsfError> {
228 let Some((slot, sb)) = self.freshest_superblock()? else {
229 return Ok(None);
230 };
231 let info = self.information2()?;
232 let Some(sb_offset) = info.superblocks.get(slot).and_then(|s| s.byte_offset()) else {
233 return Ok(None);
234 };
235 let size = sb.size as usize;
236 // Need at least header (100) + footer (12) + the 4-byte prefix.
237 if size < SUPERBLOCK_HEADER_BYTES + 12 + 4 {
238 return Err(NsfError::DecompressionFailed {
239 detail: "superblock size too small to hold a compressed body",
240 });
241 }
242 let region_start = sb_offset as usize + SUPERBLOCK_HEADER_BYTES;
243 let region_len = size - SUPERBLOCK_HEADER_BYTES - 12;
244 // The body is a chain of length-prefixed CX segments (the leading 4
245 // bytes are the first segment's compressed length). Single-segment
246 // bodies - the common superblock case - decode identically.
247 let region_end = region_start + region_len;
248 let region = self.bytes.get(region_start..region_end).ok_or(NsfError::TooShort {
249 actual: self.bytes.len(),
250 required: region_end,
251 })?;
252 let body = cx::decompress_chained(region, sb.uncompressed_size as usize)?;
253 Ok(Some(body))
254 }
255
256 /// Build the global summary-bucket descriptor map: a 0-based vector of
257 /// file byte offsets where `offsets[bucket_index - 1]` is the byte
258 /// offset of the summary bucket an RRV bucket-slot entry's
259 /// `bucket_index` refers to (`bucket_index` is 1-based on disk).
260 ///
261 /// # Multi-page geometry
262 ///
263 /// On modern ODS the summary bucket descriptors are spread across
264 /// `number_of_summary_bucket_descriptor_pages` pages. The decompressed
265 /// superblock body begins with a page index of `(pages - 1)` stride-14
266 /// records (the page's `file_position` is the first 4 bytes of each
267 /// record); those point to the out-of-body pages. The final (resident)
268 /// page's descriptor array is inline in the body at
269 /// `SUMMARY_RESIDENT_PREFIX + (pages - 1) * SUMMARY_DESCRIPTOR_BYTES`.
270 /// Single-page databases (`pages <= 1`) have only the resident page at
271 /// the libnsfdb-documented offset 224.
272 ///
273 /// libnsfdb itself only handles a single descriptor page (it errors on
274 /// `> 1`), so the multi-page geometry here was reverse-engineered and
275 /// validated against the `rrv_identifier` identity oracle (see
276 /// [`Self::enumerate_notes`]). The out-of-body page header size
277 /// ([`OUT_OF_BODY_PAGE_HEADER`]) and per-page descriptor count
278 /// ([`PER_OUT_OF_BODY_PAGE`]) are empirical constants; mis-fits surface
279 /// as identity-gate failures in [`Self::enumerate_notes`] rather than as
280 /// silently wrong records.
281 pub fn summary_bucket_offsets(&self) -> Result<Vec<u64>, NsfError> {
282 Ok(self
283 .summary_bucket_raw_fps()?
284 .into_iter()
285 .map(|fp| u64::from(fp) << 8)
286 .collect())
287 }
288
289 /// The raw 4-byte `file_position` value of each summary bucket
290 /// descriptor, 0-based by `bucket_index`. The byte offset is
291 /// `fp << 8` (see [`Self::summary_bucket_offsets`]); the raw form is
292 /// retained because the rare group-marker slots carry flag bits inside
293 /// the `file_position` field that [`Self::enumerate_notes`] corrects.
294 fn summary_bucket_raw_fps(&self) -> Result<Vec<u32>, NsfError> {
295 let Some((_, sb)) = self.freshest_superblock()? else {
296 return Ok(Vec::new());
297 };
298 let Some(body) = self.decompressed_superblock_body()? else {
299 return Ok(Vec::new());
300 };
301 let pages = sb.number_of_summary_bucket_descriptor_pages as usize;
302 let n_page_ptrs = pages.saturating_sub(1);
303 let resident_count = sb.number_of_summary_buckets as usize;
304
305 let mut fps = Vec::new();
306
307 // Out-of-body pages, in page-index order.
308 for j in 0..n_page_ptrs {
309 let page_fp = read_u32_le(&body, j * SUMMARY_DESCRIPTOR_BYTES).unwrap_or(0);
310 let page_off = u64::from(page_fp) << 8;
311 for k in 0..PER_OUT_OF_BODY_PAGE {
312 let o = page_off as usize
313 + OUT_OF_BODY_PAGE_HEADER
314 + k * SUMMARY_DESCRIPTOR_BYTES;
315 fps.push(read_u32_le(self.bytes, o).unwrap_or(0));
316 }
317 }
318
319 // Resident page, inline in the decompressed body.
320 let resident_prefix = SUMMARY_RESIDENT_PREFIX + n_page_ptrs * SUMMARY_DESCRIPTOR_BYTES;
321 for k in 0..resident_count {
322 let o = resident_prefix + k * SUMMARY_DESCRIPTOR_BYTES;
323 fps.push(read_u32_le(&body, o).unwrap_or(0));
324 }
325
326 Ok(fps)
327 }
328
329 /// Resolve a single RRV bucket-slot pair to the raw bytes of the slot's
330 /// record, using the summary-bucket descriptor map.
331 ///
332 /// This is the physical resolution step: it does not identity-check the
333 /// result. For verified note enumeration (where each resolved record is
334 /// confirmed to carry the requested `rrv_identifier`), use
335 /// [`Self::enumerate_notes`]. Rebuilds the descriptor map on each call;
336 /// callers resolving many entries should prefer `enumerate_notes`, which
337 /// builds the map once.
338 pub fn resolve_bucket_slot(
339 &self,
340 bucket_index: u32,
341 slot_index: u16,
342 ) -> Result<&'a [u8], NsfError> {
343 let offsets = self.summary_bucket_offsets()?;
344 Self::resolve_in(self.bytes, &offsets, bucket_index, slot_index)
345 }
346
347 /// Resolve `bucket_index`/`slot_index` against a prebuilt descriptor map.
348 fn resolve_in(
349 bytes: &'a [u8],
350 offsets: &[u64],
351 bucket_index: u32,
352 slot_index: u16,
353 ) -> Result<&'a [u8], NsfError> {
354 let ordinal = (bucket_index as usize)
355 .checked_sub(1)
356 .ok_or(NsfError::BucketIndexOutOfRange {
357 requested: bucket_index,
358 available: offsets.len(),
359 })?;
360 let off = *offsets
361 .get(ordinal)
362 .ok_or(NsfError::BucketIndexOutOfRange {
363 requested: bucket_index,
364 available: offsets.len(),
365 })?;
366 let start = off as usize;
367 let bucket_bytes = bytes.get(start..).ok_or(NsfError::TooShort {
368 actual: bytes.len(),
369 required: start,
370 })?;
371 let bucket = Bucket::parse(bucket_bytes)?;
372 bucket.slot(slot_index)
373 }
374
375 /// Parse the freshest Bucket Descriptor Block (BDB) - the master index
376 /// of every RRV bucket in the database. Returns `None` when no BDB slot
377 /// is populated (a fresh / never-instantiated shell). Of the two BDB
378 /// copies in [`Information2`] (primary + write-ahead-log redundancy) the
379 /// one with the higher `write_count` is authoritative.
380 pub fn bucket_descriptor_block(&self) -> Result<Option<BucketDescriptorBlock>, NsfError> {
381 let info = self.information2()?;
382 let mut best: Option<BucketDescriptorBlock> = None;
383 for slot in &info.bdbs {
384 let Some(off) = slot.byte_offset() else {
385 continue;
386 };
387 match BucketDescriptorBlock::parse(self.bytes, off, slot.size_bytes) {
388 Ok(bdb) => {
389 if best.as_ref().map_or(true, |b| bdb.write_count > b.write_count) {
390 best = Some(bdb);
391 }
392 }
393 // A malformed / superseded BDB copy is skipped; the other
394 // copy is the WAL redundancy that covers it.
395 Err(_) => continue,
396 }
397 }
398 Ok(best)
399 }
400
401 /// Enumerate every note in the database by walking the BDB -> all RRV
402 /// buckets -> each RRV entry, resolving each to a note record.
403 ///
404 /// Every resolution is **identity-gated**: a note is only accepted if
405 /// the resolved record's `rrv_identifier` (note header offset 6) equals
406 /// the RRV entry's identifier. This is the chain-of-custody guarantee -
407 /// a record is never returned unless it provably is the note the RRV
408 /// entry points to. Entries that no candidate resolves under the gate
409 /// are counted in `unresolved` rather than returned as possibly-wrong
410 /// evidence.
411 ///
412 /// # Group-marker recovery
413 ///
414 /// A small set of summary-descriptor slots (the page's group-boundary
415 /// slots) carry group-marker flag bits inside the `file_position` field:
416 /// the low nibble, or bits 16-19 (in which case the true high nibble
417 /// matches the locally-sequential neighbours). For each bucket-slot
418 /// entry the resolver tries the raw descriptor first, then these
419 /// marker-corrected candidates, accepting the first that passes the
420 /// identity gate. Because acceptance requires an exact 32-bit
421 /// `rrv_identifier` match, a wrong candidate cannot be accepted - the
422 /// recovery is heuristic in *what it tries* but never in *what it
423 /// returns*.
424 pub fn enumerate_notes(&self) -> Result<NoteEnumeration, NsfError> {
425 let mut out = NoteEnumeration::default();
426
427 let Some((_, sb)) = self.freshest_superblock()? else {
428 return Ok(out);
429 };
430 let rrv_bucket_size = sb.rrv_bucket_size as usize;
431 if rrv_bucket_size == 0 {
432 return Ok(out);
433 }
434 let raw_fps = self.summary_bucket_raw_fps()?;
435
436 // Collect every RRV bucket to walk: those listed in the BDB plus
437 // the data and non-data RRV buckets named directly in DBINFO.
438 // Deduped by byte offset - on modern ODS the DBINFO buckets are
439 // usually also in the BDB; on older / simpler databases they may
440 // not be, so both sources are needed for complete enumeration.
441 let mut rrv_offsets: std::collections::BTreeSet<u64> = std::collections::BTreeSet::new();
442 if let Some(bdb) = self.bucket_descriptor_block()? {
443 rrv_offsets.extend(bdb.rrv_buckets.iter().map(|d| d.file_offset));
444 }
445 if self.header.data_rrv_bucket_position != 0 {
446 rrv_offsets.insert(u64::from(self.header.data_rrv_bucket_position) * 256);
447 }
448 if self.header.non_data_rrv_bucket_position != 0 {
449 rrv_offsets.insert(u64::from(self.header.non_data_rrv_bucket_position) * 256);
450 }
451
452 for &bucket_off in &rrv_offsets {
453 let start = bucket_off as usize;
454 let Some(slice) = self.bytes.get(start..start.saturating_add(rrv_bucket_size))
455 else {
456 continue;
457 };
458 let Ok((_, iter)) = RrvIter::new(slice) else {
459 continue;
460 };
461 for entry in iter {
462 let resolved = match entry.location {
463 RrvLocation::FilePosition {
464 file_position_pages,
465 } => {
466 out.file_position_total += 1;
467 let off = u64::from(file_position_pages) << 8;
468 self.bytes
469 .get(off as usize..)
470 .and_then(|buf| self.note_if_matches(entry.rrv_identifier, off, buf))
471 }
472 RrvLocation::BucketSlot {
473 bucket_index,
474 slot_index,
475 ..
476 } => {
477 out.bucket_slot_total += 1;
478 self.resolve_validated(&raw_fps, bucket_index, slot_index, entry.rrv_identifier)
479 }
480 };
481 match resolved {
482 Some(note) => out.notes.push(note),
483 None => out.unresolved += 1,
484 }
485 }
486 }
487 Ok(out)
488 }
489
490 /// Parse `buf` as a note header and return a [`ResolvedNote`] only if it
491 /// carries `expected_identifier` (the identity gate).
492 fn note_if_matches(
493 &self,
494 expected_identifier: u32,
495 file_offset: u64,
496 buf: &[u8],
497 ) -> Option<ResolvedNote> {
498 match NoteHeader::parse(buf) {
499 Ok(header) if header.rrv_identifier == expected_identifier => Some(ResolvedNote {
500 rrv_identifier: expected_identifier,
501 file_offset,
502 header,
503 }),
504 _ => None,
505 }
506 }
507
508 /// Resolve a bucket-slot entry to an identity-verified note, trying the
509 /// raw descriptor first then group-marker-corrected candidates. Returns
510 /// `None` only if no candidate yields a note carrying `expected_id`.
511 fn resolve_validated(
512 &self,
513 raw_fps: &[u32],
514 bucket_index: u32,
515 slot_index: u16,
516 expected_id: u32,
517 ) -> Option<ResolvedNote> {
518 let ord = (bucket_index as usize).checked_sub(1)?;
519 let primary = *raw_fps.get(ord)?;
520 // High nibble (bits 16-19) of neighbouring descriptors, used to
521 // repair a bits-16-19 group marker (buckets are locally sequential).
522 let prev_hi = ord
523 .checked_sub(1)
524 .and_then(|i| raw_fps.get(i))
525 .map(|f| f & 0x000F_0000)
526 .unwrap_or(0);
527 let next_hi = raw_fps.get(ord + 1).map(|f| f & 0x000F_0000).unwrap_or(0);
528
529 let candidates = [
530 primary,
531 primary & 0xFFFF_FFF0, // low-nibble group marker
532 (primary & 0xFFF0_FFFF) | prev_hi, // bits-16-19 marker, prev high nibble
533 (primary & 0xFFF0_FFFF) | next_hi, // bits-16-19 marker, next high nibble
534 ];
535
536 for &fp in &candidates {
537 let bucket_off = u64::from(fp) << 8;
538 let Some(buf) = self.bytes.get(bucket_off as usize..) else {
539 continue;
540 };
541 let Ok(bucket) = Bucket::parse(buf) else {
542 continue;
543 };
544 let Ok(slot) = bucket.slot(slot_index) else {
545 continue;
546 };
547 let slot_off = bucket_off + (slot.as_ptr() as usize - buf.as_ptr() as usize) as u64;
548 if let Some(note) = self.note_if_matches(expected_id, slot_off, slot) {
549 return Some(note);
550 }
551 }
552 None
553 }
554
555 /// Return a note's non-summary data object - the separately-stored
556 /// large payload that holds rich-text ($Body / mail bodies), file
557 /// attachments (OBJECT items), and other items too big for the inline
558 /// summary. `None` when the note has no non-summary data.
559 ///
560 /// Location: `non_summary_data_identifier << 8` is the byte offset of
561 /// the object, which opens with a header - signature `0x0010`, then a
562 /// `u32` size and the owning note's `u32` rrv_identifier (both validated
563 /// here) - followed by the payload (a CD-record stream for rich text, or
564 /// object segments for attachments). The returned slice is the whole
565 /// object including that header; record-level decoding (CD records,
566 /// attachment extraction) is a later slice.
567 pub fn non_summary_data(&self, note: &ResolvedNote) -> Option<&'a [u8]> {
568 let id = note.header.non_summary_data_identifier;
569 let size = note.header.non_summary_data_size as usize;
570 if id == 0 || size < 10 {
571 return None;
572 }
573 let off = (u64::from(id) << 8) as usize;
574 let obj = self.bytes.get(off..off.checked_add(size)?)?;
575 // Validate the object header against the note's own metadata so a
576 // wrong / stale identifier never returns unrelated bytes.
577 let hdr_size = u32::from_le_bytes([obj[2], obj[3], obj[4], obj[5]]) as usize;
578 let hdr_rrv = u32::from_le_bytes([obj[6], obj[7], obj[8], obj[9]]);
579 if obj[0] != 0x10 || obj[1] != 0x00 || hdr_size != size || hdr_rrv != note.rrv_identifier {
580 return None;
581 }
582 Some(obj)
583 }
584
585 /// Decode a note's rich-text body and attachments from its non-summary
586 /// data (CD-record stream). Returns `None` when the note has no
587 /// non-summary data or it decodes to nothing. See [`crate::cd`].
588 pub fn note_content(&self, note: &ResolvedNote) -> Option<crate::cd::NoteContent> {
589 let obj = self.non_summary_data(note)?;
590 let content = crate::cd::parse(obj);
591 if content.is_empty() {
592 None
593 } else {
594 Some(content)
595 }
596 }
597
598 /// Parse the items (fields) of a resolved note: each item's name id,
599 /// type/flags, and raw value bytes. See [`crate::item`] for the layout
600 /// and what is / isn't decoded (field-name resolution is a later slice).
601 ///
602 /// The record window is bounded to the note's declared `size` so item
603 /// values cannot read into a neighbouring record.
604 pub fn note_items(&self, note: &ResolvedNote) -> Vec<crate::item::NoteItem<'a>> {
605 let start = note.file_offset as usize;
606 let end = start
607 .saturating_add(note.header.size as usize)
608 .min(self.bytes.len());
609 let Some(record) = self.bytes.get(start..end) else {
610 return Vec::new();
611 };
612 crate::item::parse_items(record, note.header.number_of_note_items)
613 }
614}
615
616/// One note resolved (and identity-verified) by [`Database::enumerate_notes`].
617#[derive(Debug, Clone)]
618pub struct ResolvedNote {
619 /// The RRV identifier the note was reached through (== the note
620 /// header's `rrv_identifier`; the identity gate guarantees equality).
621 pub rrv_identifier: u32,
622 /// Byte offset of the note record within the file.
623 pub file_offset: u64,
624 /// The parsed note header.
625 pub header: NoteHeader,
626}
627
628/// Result of a full-database note enumeration via [`Database::enumerate_notes`].
629#[derive(Debug, Clone, Default)]
630pub struct NoteEnumeration {
631 /// Every identity-verified note, in RRV-walk order.
632 pub notes: Vec<ResolvedNote>,
633 /// RRV entries that could not be resolved to a note carrying the
634 /// expected identifier (failed the identity gate). Reported rather than
635 /// returned as possibly-wrong records.
636 pub unresolved: u64,
637 /// Total bucket-slot RRV entries seen.
638 pub bucket_slot_total: u64,
639 /// Total file-position RRV entries seen.
640 pub file_position_total: u64,
641}