obj_core/integrity.rs
1//! On-disk integrity check (M11 #90).
2//!
3//! [`IntegrityReport`] is the structured output of
4//! [`crate::pager::Pager`]-level walks that verify every documented
5//! invariant of the `.obj` file format. The public driver lives in
6//! the `obj` crate (`obj::Db::integrity_check`); this module hosts
7//! the data types and the obj-core-internal helpers that do the
8//! heavy lifting.
9//!
10//! # Power-of-ten posture
11//!
12//! - **Rule 2.** Every walk is bounded by `page_count` (the page-
13//! reachability sweep), [`crate::btree::MAX_BTREE_DEPTH`] (the
14//! B-tree descent), or [`crate::btree::MAX_RANGE_NODES`] (the
15//! B-tree iteration).
16//! - **Rule 4.** Each entry point is short; the implementation
17//! factors per-tree / per-index helpers.
18//! - **Rule 7.** No `unwrap` / `expect` on the production path. I/O
19//! failures bubble up via `Result::Err`; invariant violations are
20//! recorded into the report's `failures` vector and the walk
21//! continues.
22//! - **Rule 9.** No `dyn`: every helper is generic over
23//! `F: FileBackend` (no trait-object dispatch on the hot path).
24
25#![forbid(unsafe_code)]
26// The integrity walker only ever uses the default `HashSet` hasher;
27// the helpers below take `&HashSet<u64>` / `&HashSet<PageId>` rather
28// than the `HashSet<_, S: BuildHasher>` shape that
29// `clippy::pedantic::implicit_hasher` would prefer. Adding a hasher
30// type parameter to every helper signature is noise for an internal
31// integrity API that callers never instantiate with a non-default
32// hasher.
33#![allow(clippy::implicit_hasher)]
34
35use std::collections::HashSet;
36use std::time::Duration;
37
38use crate::btree::node::{decode_node, NodeKind};
39use crate::btree::{MAX_BTREE_DEPTH, MAX_RANGE_NODES};
40use crate::catalog::{Catalog, CollectionDescriptor, IndexDescriptor, IndexStatus};
41use crate::error::{Error, Result};
42use crate::id::Id;
43use crate::index::IndexKind;
44use crate::pager::checksum::page_trailer_valid;
45use crate::pager::freelist::decode as decode_freelist_page;
46use crate::pager::page::PageId;
47use crate::pager::Pager;
48use crate::platform::FileBackend;
49
50use heapless::Vec as HeaplessVec;
51
52/// Categorical reasons an integrity walk records a failure. Every
53/// variant carries the locus of the problem (page id, collection,
54/// index name, document id) so an operator can root-cause without
55/// re-running the check.
56///
57/// The integrity walker accumulates a `Vec<IntegrityFailure>` and
58/// returns it inside an [`IntegrityReport`] — every failure here is
59/// recoverable in the sense that the check **completed**; the
60/// caller decides whether to repair the file or refuse to open it.
61/// I/O failures during the walk surface as an outer `Result::Err`
62/// instead (the walk could not finish).
63#[derive(Debug, Clone, PartialEq, Eq)]
64#[non_exhaustive]
65pub enum IntegrityFailure {
66 /// A page's CRC32C trailer did not verify. The page-id is the
67 /// locus; page `0` would be a header CRC failure (the file
68 /// header carries its own CRC, not a page trailer — this
69 /// variant is emitted for any page whose trailer fails the
70 /// per-page integrity check).
71 ChecksumMismatch {
72 /// Page where the corruption was detected.
73 page_id: u64,
74 },
75 /// A page in `1..page_count` is not reachable from any root the
76 /// walker recognises (catalog, freelist, any primary or index
77 /// B-tree). Indicates either a leaked page (allocated, never
78 /// linked) or a corrupted root pointer somewhere upstream.
79 OrphanPage {
80 /// The unreferenced page id.
81 page_id: u64,
82 },
83 /// An index B-tree entry references a primary id that does NOT
84 /// exist in the collection's primary B-tree. Indicates the
85 /// index is out-of-sync (likely a partial write that survived
86 /// recovery, or a forensic mutation outside obj).
87 OrphanIndexEntry {
88 /// Owning collection name.
89 collection: String,
90 /// Owning index name.
91 index: String,
92 /// Primary id the index entry pointed at.
93 id: u64,
94 },
95 /// A primary B-tree entry has no matching entry in one of the
96 /// collection's `Active` indexes. The walker checks the
97 /// per-index `(id-suffix-or-value → at-least-one-entry)`
98 /// invariant; an `Each` index with an empty sequence is NOT
99 /// reported (legal: the doc contributes no keys). For
100 /// `Standard` / `Unique` / `Composite` the absence of any
101 /// matching entry is a violation.
102 MissingIndexEntry {
103 /// Owning collection name.
104 collection: String,
105 /// Owning index name.
106 index: String,
107 /// Primary id with no matching index entry.
108 id: u64,
109 },
110 /// A B+tree node failed the sort invariant: a key was followed
111 /// by a strictly-lesser-or-equal key inside the same node.
112 BTreeSortViolation {
113 /// Page where the violation was detected.
114 page_id: u64,
115 },
116 /// A B+tree's child-pointer graph contains a cycle — the same
117 /// page is reachable as the child of two distinct ancestors (or
118 /// of itself). The `tree` field names the containing B-tree by
119 /// a human-readable label (`"catalog"`,
120 /// `"primary:<collection>"`, `"index:<collection>.<index>"`).
121 ///
122 /// Historically this variant also covered leaf `next_sibling`
123 /// chain breakage; the chain check was removed in M11 because
124 /// `CoW` deliberately leaves left-neighbour `next_sibling`
125 /// pointers stale (see `crates/obj-core/src/btree/range.rs`
126 /// lines 16-31). The variant name is preserved for compatibility
127 /// — it now signals only the descent-graph cycle path.
128 BTreeSiblingChainBroken {
129 /// Label identifying which B-tree the violation surfaced on.
130 tree: String,
131 /// Page where the cycle was detected.
132 page_id: u64,
133 },
134 /// A B+tree's depth exceeded
135 /// [`crate::btree::MAX_BTREE_DEPTH`]. Indicates a runaway tree
136 /// shape — by construction the obj writer cannot produce one;
137 /// surfacing here means the file was mutated outside obj.
138 BTreeDepthExceeded {
139 /// Label identifying which B-tree tripped the depth bound.
140 tree: String,
141 /// The bound that was exceeded.
142 limit: usize,
143 },
144 /// A B+tree node's `level` value was inconsistent with its
145 /// kind — leaves must be level 0, internals strictly positive,
146 /// and the level must decrease by exactly 1 when descending
147 /// from an internal node to its child.
148 BTreeLevelInvariantViolated {
149 /// Label identifying the B-tree.
150 tree: String,
151 /// Page where the violation was detected.
152 page_id: u64,
153 },
154 /// A `CollectionDescriptor` carried a `primary_root` or
155 /// `IndexDescriptor.root_page_id` that does not point at a
156 /// page id in `1..page_count` — the catalog references a page
157 /// the file does not contain.
158 DanglingCatalogPointer {
159 /// Owning collection name.
160 collection: String,
161 /// Optional index name when the dangling pointer is on an
162 /// index descriptor; `None` when it is the
163 /// `primary_root` itself.
164 index: Option<String>,
165 /// The out-of-range page id.
166 page_id: u64,
167 },
168 /// The freelist chain was broken — a `next` link pointed at a
169 /// non-freelist page (or a freelist page failed to decode), at
170 /// a page id past `page_count`, or the chain looped.
171 FreelistChainBroken {
172 /// Page id where the broken link was detected.
173 page_id: u64,
174 },
175}
176
177/// Structured result of an [integrity check](crate::integrity).
178///
179/// The public driver returns this inside `Ok(_)` whenever the walk
180/// completes (whether or not it found any failures). An `Err`
181/// outer result means the walk could not finish — an I/O failure,
182/// not a content-level integrity violation.
183#[derive(Debug, Clone)]
184pub struct IntegrityReport {
185 /// Every detected violation, in walker-encounter order.
186 pub failures: Vec<IntegrityFailure>,
187 /// Pages the walker actually inspected (header + every tree
188 /// node + every freelist link). Provides a sanity-check vs
189 /// `page_count`: a check that ran without errors but visited
190 /// noticeably fewer pages than the file holds suggests a
191 /// reachability problem (the [`IntegrityFailure::OrphanPage`]
192 /// failures cover the concrete cases).
193 pub pages_checked: u64,
194 /// Wall-clock duration the walk took. Useful for the operator
195 /// to know whether the check finished within a maintenance
196 /// window.
197 pub elapsed: Duration,
198}
199
200impl IntegrityReport {
201 /// `true` iff no failure was recorded — the report indicates a
202 /// healthy database.
203 #[must_use]
204 pub fn is_ok(&self) -> bool {
205 self.failures.is_empty()
206 }
207}
208
209/// Snapshot of the on-disk shape needed to walk a single B-tree.
210/// Used by the obj layer's wrapper to thread per-tree context
211/// (label, root) into the obj-core helper.
212#[derive(Debug, Clone)]
213pub struct TreeContext {
214 /// Human-readable label used in failure messages (`"catalog"`,
215 /// `"primary:users"`, `"index:users.by_email"`).
216 pub label: String,
217 /// Root page id for the tree.
218 pub root: PageId,
219}
220
221/// Walk a single B-tree from `ctx.root`, recording any per-node
222/// invariant violations into `failures` and inserting every visited
223/// node page-id into `reachable`. Returns the number of pages this
224/// walk inspected.
225///
226/// The walk traverses the tree level by level via an explicit
227/// `Vec<PageId>` queue and a `HashSet<PageId>` already-seen guard
228/// (so a cycle introduced by an out-of-band mutation cannot
229/// infinitely-loop the walker). Bounded by
230/// [`MAX_RANGE_NODES`] per tree (Rule 2).
231///
232/// # Errors
233///
234/// Returns [`Error::Io`] on cache-miss read failure. Never returns
235/// an `Error::Corruption` directly — corruption surfaces as a
236/// [`IntegrityFailure`] entry in `failures`.
237pub fn walk_btree<F: FileBackend>(
238 pager: &mut Pager<F>,
239 ctx: &TreeContext,
240 reachable: &mut HashSet<PageId>,
241 failures: &mut Vec<IntegrityFailure>,
242) -> Result<u64> {
243 let mut queue: Vec<(PageId, u32)> = Vec::new();
244 queue.push((ctx.root, 0));
245 let mut visited: HashSet<PageId> = HashSet::new();
246 let mut pages_walked: u64 = 0;
247 while let Some((pid, depth)) = queue.pop() {
248 if depth as usize > MAX_BTREE_DEPTH {
249 failures.push(IntegrityFailure::BTreeDepthExceeded {
250 tree: ctx.label.clone(),
251 limit: MAX_BTREE_DEPTH,
252 });
253 return Ok(pages_walked);
254 }
255 if !visited.insert(pid) {
256 // Cycle in the descent graph — the same page is
257 // reachable as the child of two distinct ancestors.
258 // A `CoW` B-tree never produces this shape; surfacing
259 // here means the file was mutated outside obj.
260 // Recorded under the `BTreeSiblingChainBroken` variant
261 // (see its docstring — the variant name is retained
262 // for compatibility now that the leaf sibling-chain
263 // check is gone).
264 failures.push(IntegrityFailure::BTreeSiblingChainBroken {
265 tree: ctx.label.clone(),
266 page_id: pid.get(),
267 });
268 continue;
269 }
270 reachable.insert(pid);
271 pages_walked = pages_walked.saturating_add(1);
272 if pages_walked > MAX_RANGE_NODES as u64 {
273 failures.push(IntegrityFailure::BTreeDepthExceeded {
274 tree: ctx.label.clone(),
275 limit: MAX_RANGE_NODES,
276 });
277 return Ok(pages_walked);
278 }
279 let decoded_opt = read_and_validate_node(pager, pid, &ctx.label, failures)?;
280 let Some(decoded) = decoded_opt else {
281 continue;
282 };
283 if matches!(decoded.kind, NodeKind::Internal) {
284 for raw in &decoded.children {
285 if let Some(child) = PageId::new(*raw) {
286 queue.push((child, depth + 1));
287 }
288 }
289 }
290 }
291 // NOTE (M11): we deliberately do NOT walk leaf `next_sibling`
292 // pointers here. The `CoW` B+tree (see
293 // `crates/obj-core/src/btree/range.rs` lines 16-31) leaves
294 // left-neighbour `next_sibling` pointers stale by design — they
295 // point at the OLD page-id of a right neighbour that has since
296 // been rewritten and freed. Cascading those updates would
297 // unfold the rewrite up the tree arbitrarily. `RangeIter` does
298 // not trust `next_sibling`; `integrity_check` doesn't either.
299 // The remaining checks (per-page CRC, sort, depth, level, the
300 // bidirectional primary↔index cross-reference, and the cycle
301 // guard above) cover every corruption mode that matters.
302 Ok(pages_walked)
303}
304
305/// Read a B-tree node page and verify per-page invariants (trailer
306/// CRC + decoder agreement + sort invariant). Returns the decoded
307/// node when validation passes; returns `Ok(None)` and records the
308/// failure when the page is so broken decoding cannot continue
309/// (the caller treats this as "do not descend further" and moves
310/// on to the next queued page).
311fn read_and_validate_node<F: FileBackend>(
312 pager: &mut Pager<F>,
313 pid: PageId,
314 tree_label: &str,
315 failures: &mut Vec<IntegrityFailure>,
316) -> Result<Option<crate::btree::node::DecodedNode>> {
317 let page = match pager.read_page(pid) {
318 Ok(pr) => pr.to_owned_page(),
319 Err(Error::Corruption { page_id }) => {
320 failures.push(IntegrityFailure::ChecksumMismatch { page_id });
321 return Ok(None);
322 }
323 Err(e) => return Err(e),
324 };
325 if !page_trailer_valid(&page) {
326 failures.push(IntegrityFailure::ChecksumMismatch { page_id: pid.get() });
327 return Ok(None);
328 }
329 if let Ok(decoded) = decode_node(page.as_bytes()) {
330 verify_sort_invariant(pid, &decoded, failures);
331 verify_level_invariant(pid, &decoded, tree_label, failures);
332 Ok(Some(decoded))
333 } else {
334 failures.push(IntegrityFailure::ChecksumMismatch { page_id: pid.get() });
335 Ok(None)
336 }
337}
338
339/// Confirm the node's keys are in strictly-ascending order.
340fn verify_sort_invariant(
341 pid: PageId,
342 decoded: &crate::btree::node::DecodedNode,
343 failures: &mut Vec<IntegrityFailure>,
344) {
345 let mut prev: Option<&[u8]> = None;
346 let keys: Box<dyn Iterator<Item = &[u8]> + '_> = match decoded.kind {
347 NodeKind::Leaf => Box::new(decoded.leaves.iter().map(|e| e.key.as_slice())),
348 NodeKind::Internal => Box::new(decoded.internals.iter().map(|e| e.key.as_slice())),
349 };
350 for key in keys {
351 if let Some(p) = prev {
352 if key <= p {
353 failures.push(IntegrityFailure::BTreeSortViolation { page_id: pid.get() });
354 return;
355 }
356 }
357 prev = Some(key);
358 }
359}
360
361/// Confirm leaf-level = 0 and internal-level > 0.
362fn verify_level_invariant(
363 pid: PageId,
364 decoded: &crate::btree::node::DecodedNode,
365 tree_label: &str,
366 failures: &mut Vec<IntegrityFailure>,
367) {
368 let level_ok = match decoded.kind {
369 NodeKind::Leaf => decoded.level == 0,
370 NodeKind::Internal => decoded.level > 0,
371 };
372 if !level_ok {
373 failures.push(IntegrityFailure::BTreeLevelInvariantViolated {
374 tree: tree_label.to_owned(),
375 page_id: pid.get(),
376 });
377 }
378}
379
380/// Walk the freelist chain starting at `head`, inserting every link
381/// page into `reachable` and recording any chain breakage into
382/// `failures`. Returns the number of freelist link pages walked.
383///
384/// Bounded by `page_count` (a freelist chain cannot have more entries
385/// than the file has pages).
386///
387/// # Errors
388///
389/// Returns [`Error::Io`] on cache-miss read failure. Content-level
390/// breakage is recorded into `failures` (the walk does NOT abort
391/// on a broken link; the walker records the failure and stops the
392/// freelist sweep so a subsequent walk does not loop).
393pub fn walk_freelist<F: FileBackend>(
394 pager: &mut Pager<F>,
395 head_raw: u64,
396 page_count: u64,
397 reachable: &mut HashSet<PageId>,
398 failures: &mut Vec<IntegrityFailure>,
399) -> Result<u64> {
400 let Some(mut current_raw) = Some(head_raw) else {
401 return Ok(0);
402 };
403 if current_raw == 0 {
404 return Ok(0);
405 }
406 let mut current = current_raw;
407 let mut steps: u64 = 0;
408 let mut seen: HashSet<PageId> = HashSet::new();
409 while current != 0 {
410 steps = steps.saturating_add(1);
411 if steps > page_count {
412 failures.push(IntegrityFailure::FreelistChainBroken { page_id: current });
413 return Ok(steps);
414 }
415 let Some(pid) = PageId::new(current) else {
416 failures.push(IntegrityFailure::FreelistChainBroken { page_id: current });
417 return Ok(steps);
418 };
419 if pid.get() >= page_count {
420 failures.push(IntegrityFailure::FreelistChainBroken { page_id: pid.get() });
421 return Ok(steps);
422 }
423 if !seen.insert(pid) {
424 failures.push(IntegrityFailure::FreelistChainBroken { page_id: pid.get() });
425 return Ok(steps);
426 }
427 reachable.insert(pid);
428 let page = match pager.read_page(pid) {
429 Ok(pr) => pr.to_owned_page(),
430 Err(Error::Corruption { page_id }) => {
431 failures.push(IntegrityFailure::ChecksumMismatch { page_id });
432 return Ok(steps);
433 }
434 Err(e) => return Err(e),
435 };
436 if !page_trailer_valid(&page) {
437 failures.push(IntegrityFailure::ChecksumMismatch { page_id: pid.get() });
438 return Ok(steps);
439 }
440 let Some(entry) = decode_freelist_page(&page) else {
441 failures.push(IntegrityFailure::FreelistChainBroken { page_id: pid.get() });
442 return Ok(steps);
443 };
444 current_raw = entry.next;
445 current = current_raw;
446 }
447 Ok(steps)
448}
449
450/// Confirm the catalog descriptor's `primary_root` + each `Active`
451/// index's `root_page_id` point at pages within `1..page_count`.
452/// Records `DanglingCatalogPointer` failures for every miss.
453pub fn check_catalog_pointers(
454 name: &str,
455 descriptor: &CollectionDescriptor,
456 page_count: u64,
457 failures: &mut Vec<IntegrityFailure>,
458) {
459 if descriptor.primary_root == 0 || descriptor.primary_root >= page_count {
460 failures.push(IntegrityFailure::DanglingCatalogPointer {
461 collection: name.to_owned(),
462 index: None,
463 page_id: descriptor.primary_root,
464 });
465 }
466 for index in &descriptor.indexes {
467 if index.status != IndexStatus::Active {
468 continue;
469 }
470 if index.root_page_id == 0 || index.root_page_id >= page_count {
471 failures.push(IntegrityFailure::DanglingCatalogPointer {
472 collection: name.to_owned(),
473 index: Some(index.name.clone()),
474 page_id: index.root_page_id,
475 });
476 }
477 }
478}
479
480/// Verify every entry in an `Active` index B-tree (`Standard`,
481/// `Each`, `Composite`: the trailing 8-byte key suffix is the id;
482/// `Unique`: the value is the id) references a primary id that
483/// exists in the primary B-tree.
484///
485/// `primary_ids` is the set of every id present in the collection's
486/// primary tree (pre-computed by the caller's primary walk).
487/// `referenced_ids` (out-param) accumulates the set of primary ids
488/// referenced by AT LEAST ONE index entry — the caller uses it to
489/// run the Primary → Index `MissingIndexEntry` check.
490///
491/// # Errors
492///
493/// Returns [`Error::Io`] on cache-miss read failure. Content-level
494/// breakage is recorded into `failures` and iteration continues.
495pub fn cross_reference_index<F: FileBackend>(
496 pager: &mut Pager<F>,
497 collection: &str,
498 index: &IndexDescriptor,
499 primary_ids: &HashSet<u64>,
500 referenced_ids: &mut HashSet<u64>,
501 failures: &mut Vec<IntegrityFailure>,
502) -> Result<u64> {
503 let Some(root) = PageId::new(index.root_page_id) else {
504 return Ok(0);
505 };
506 let tree = crate::btree::BTree::<F>::open(pager, root)?;
507 let iter = match tree.range(pager, ..) {
508 Ok(it) => it,
509 Err(Error::Corruption { page_id }) => {
510 failures.push(IntegrityFailure::ChecksumMismatch { page_id });
511 return Ok(0);
512 }
513 Err(e) => return Err(e),
514 };
515 let mut scanned: u64 = 0;
516 for step in iter {
517 scanned = scanned
518 .checked_add(1)
519 .ok_or(Error::BTreeInvariantViolated {
520 reason: "index entry count overflow",
521 })?;
522 let (full_key, value) = match step {
523 Ok(kv) => kv,
524 Err(Error::Corruption { page_id }) => {
525 failures.push(IntegrityFailure::ChecksumMismatch { page_id });
526 return Ok(scanned);
527 }
528 Err(e) => return Err(e),
529 };
530 let Some(raw_id) = recover_id_from_entry(&full_key, &value, index.kind) else {
531 failures.push(IntegrityFailure::OrphanIndexEntry {
532 collection: collection.to_owned(),
533 index: index.name.clone(),
534 id: 0,
535 });
536 continue;
537 };
538 if !primary_ids.contains(&raw_id) {
539 failures.push(IntegrityFailure::OrphanIndexEntry {
540 collection: collection.to_owned(),
541 index: index.name.clone(),
542 id: raw_id,
543 });
544 continue;
545 }
546 referenced_ids.insert(raw_id);
547 }
548 Ok(scanned)
549}
550
551/// Recover the `Id` from one index B-tree entry. Mirrors the
552/// `Collection<T>::id_from_index_entry` private helper but is
553/// type-agnostic. For `Unique` indexes the id is the value; for
554/// every other kind it is the trailing 8-byte big-endian suffix of
555/// the key.
556fn recover_id_from_entry(full_key: &[u8], value: &[u8], kind: IndexKind) -> Option<u64> {
557 let bytes: &[u8] = match kind {
558 IndexKind::Unique => value,
559 IndexKind::Standard | IndexKind::Each | IndexKind::Composite => {
560 if full_key.len() < 8 {
561 return None;
562 }
563 &full_key[full_key.len() - 8..]
564 }
565 };
566 let id = Id::from_be_bytes(bytes)?;
567 Some(id.get())
568}
569
570/// Walk every entry in the primary B-tree of `descriptor`,
571/// inserting each id into `primary_ids` and recording any
572/// per-page failures (CRC, sort, depth) along the way. Returns the
573/// number of leaf entries scanned.
574///
575/// # Errors
576///
577/// Returns [`Error::Io`] on cache-miss read failure. Content-level
578/// breakage is recorded into `failures` (via `walk_btree` if the
579/// caller paired the two walks) or surfaced as an early return
580/// from this function on a B-tree decode failure.
581pub fn collect_primary_ids<F: FileBackend>(
582 pager: &mut Pager<F>,
583 descriptor: &CollectionDescriptor,
584 primary_ids: &mut HashSet<u64>,
585) -> Result<u64> {
586 let Some(root) = PageId::new(descriptor.primary_root) else {
587 return Ok(0);
588 };
589 let tree = crate::btree::BTree::<F>::open(pager, root)?;
590 let iter = match tree.range(pager, ..) {
591 Ok(it) => it,
592 Err(Error::Corruption { .. }) => return Ok(0),
593 Err(e) => return Err(e),
594 };
595 let mut count: u64 = 0;
596 for step in iter {
597 count = count.checked_add(1).ok_or(Error::BTreeInvariantViolated {
598 reason: "primary entry count overflow",
599 })?;
600 let (key, _value) = match step {
601 Ok(kv) => kv,
602 Err(Error::Corruption { .. }) => return Ok(count),
603 Err(e) => return Err(e),
604 };
605 if let Some(id) = Id::from_be_bytes(&key) {
606 primary_ids.insert(id.get());
607 }
608 }
609 Ok(count)
610}
611
612/// Build the set of every primary id that has AT LEAST ONE
613/// surviving index entry across the collection's `Active` indexes,
614/// and emit `MissingIndexEntry` failures for any primary id that is
615/// NOT referenced by an index whose kind is `Standard`, `Unique`,
616/// or `Composite`. `Each` indexes are exempted — they legally
617/// reference zero entries when the source sequence is empty.
618pub fn check_primary_to_index(
619 collection: &str,
620 descriptor: &CollectionDescriptor,
621 primary_ids: &HashSet<u64>,
622 per_index_referenced: &[(String, IndexKind, HashSet<u64>)],
623 failures: &mut Vec<IntegrityFailure>,
624) {
625 for (index_name, kind, referenced) in per_index_referenced {
626 if matches!(*kind, IndexKind::Each) {
627 continue;
628 }
629 for id in primary_ids {
630 if !referenced.contains(id) {
631 failures.push(IntegrityFailure::MissingIndexEntry {
632 collection: collection.to_owned(),
633 index: index_name.clone(),
634 id: *id,
635 });
636 }
637 }
638 }
639 debug_assert!(
640 descriptor.indexes.iter().all(|d| {
641 d.status != IndexStatus::Active
642 || per_index_referenced.iter().any(|(n, _, _)| n == &d.name)
643 }),
644 "every Active index must contribute a referenced-ids set",
645 );
646}
647
648/// Compute the fast subset of the integrity walk: validate the
649/// file-header invariants and walk the catalog tree (and only the
650/// catalog tree), without descending into any per-collection
651/// primary or index.
652///
653/// Used by the obj crate's open-time fast check (M11 #91) and by
654/// the obj crate's `Db::integrity_check` for the catalog portion
655/// of the full walk.
656///
657/// # Errors
658///
659/// - [`Error::Io`] on cache-miss read failure.
660///
661/// The returned `IntegrityReport` carries failures encountered
662/// during the catalog walk; the caller decides whether to surface
663/// them as `Err(Error::Corruption { ... })` (the open-time fast
664/// path does) or to merge them into a broader report (the on-
665/// demand full walk does).
666pub fn quick_check<F: FileBackend>(pager: &mut Pager<F>) -> Result<IntegrityReport> {
667 let start = std::time::Instant::now();
668 let mut failures: Vec<IntegrityFailure> = Vec::new();
669 let mut reachable: HashSet<PageId> = HashSet::new();
670 let page_count = pager.page_count();
671 let mut pages_checked: u64 = 1; // page 0 (header) — `Pager::open` already verified it.
672 if let Some(root) = PageId::new(pager.root_catalog()) {
673 if root.get() >= page_count {
674 failures.push(IntegrityFailure::DanglingCatalogPointer {
675 collection: "<catalog>".to_owned(),
676 index: None,
677 page_id: root.get(),
678 });
679 } else {
680 let ctx = TreeContext {
681 label: "catalog".to_owned(),
682 root,
683 };
684 pages_checked = pages_checked.saturating_add(walk_btree(
685 pager,
686 &ctx,
687 &mut reachable,
688 &mut failures,
689 )?);
690 list_catalog_for_pointer_check(pager, page_count, &mut failures)?;
691 }
692 }
693 Ok(IntegrityReport {
694 failures,
695 pages_checked,
696 elapsed: start.elapsed(),
697 })
698}
699
700/// Read every catalog row and check the `primary_root` +
701/// `root_page_id` pointers without walking the per-collection trees.
702/// Used by [`quick_check`].
703fn list_catalog_for_pointer_check<F: FileBackend>(
704 pager: &mut Pager<F>,
705 page_count: u64,
706 failures: &mut Vec<IntegrityFailure>,
707) -> Result<()> {
708 let raw = pager.root_catalog();
709 if PageId::new(raw).is_none() {
710 return Ok(());
711 }
712 let catalog = match Catalog::<F>::open_or_init(pager) {
713 Ok(c) => c,
714 Err(Error::Corruption { .. }) => return Ok(()),
715 Err(e) => return Err(e),
716 };
717 let rows = match catalog.list_collections(pager) {
718 Ok(r) => r,
719 Err(Error::Corruption { .. }) => return Ok(()),
720 Err(e) => return Err(e),
721 };
722 for (name, descriptor) in rows {
723 check_catalog_pointers(&name, &descriptor, page_count, failures);
724 }
725 Ok(())
726}
727
728/// Helper: ensure `path` is the canonical descent stack shape used
729/// by the integrity walk's caller. Re-exported for the obj crate's
730/// per-T extension so it does not have to reimplement the
731/// `HeaplessVec<PageId, MAX_BTREE_DEPTH>` pattern.
732#[must_use]
733pub fn fresh_descent_stack() -> HeaplessVec<PageId, MAX_BTREE_DEPTH> {
734 HeaplessVec::new()
735}