Skip to main content

reddb_server/storage/query/executors/
bitmap_scan.rs

1//! Bitmap heap scan — Fase 5 P6 consumer of `TidBitmap`.
2//!
3//! Implements the second half of the PG bitmap-index-scan
4//! pipeline: given a `TidBitmap` produced by AND/OR-ing
5//! per-index bitmaps, walk the target heap pages in sorted
6//! order and fetch the rows corresponding to set bits.
7//!
8//! The win over a plain index scan is **sequential page
9//! access**: bitmap entries are sorted by TID, so successive
10//! fetches go to adjacent pages, giving the OS and buffer
11//! pool a prefetch-friendly stream instead of random seeks.
12//! For queries touching >1% of a large table the difference
13//! is 5-20× on spinning disks and ~2-3× on SSDs.
14//!
15//! Mirrors PG's `nodeBitmapHeapscan.c` modulo features we
16//! don't have:
17//!
18//! - **Lossy bitmap entries**: PG's tidbitmap can spill to
19//!   page-level granularity when memory pressure mounts.
20//!   `TidBitmap` doesn't, so the bitmap heap scan here
21//!   always processes tuple-level entries.
22//! - **Prefetch hints**: PG calls `PrefetchBuffer` for the
23//!   next few pages while the current page is being
24//!   processed. We rely on the OS readahead for now.
25//! - **Parallel bitmap heap scans**: single-producer for now.
26//!
27//! This module is **not yet wired** into the canonical plan.
28//! A `BitmapHeapScan` logical plan node and its executor
29//! arm plug into `query_exec/table.rs` when the planner
30//! learns to emit bitmap paths.
31
32use crate::storage::index::tid_bitmap::TidBitmap;
33
34/// Callback the bitmap scan uses to fetch a row by its TID.
35/// The caller (typically the runtime executor) provides this
36/// when invoking the scan because the row shape depends on
37/// the target collection.
38pub trait RowFetcher {
39    type Row;
40    type Error;
41    /// Load the row at `(page, row_within_page)`. Returns
42    /// `None` when the slot is empty (tombstone / deleted)
43    /// so the scan can skip it without raising an error.
44    fn fetch(&self, page: u32, row_within_page: u32) -> Result<Option<Self::Row>, Self::Error>;
45}
46
47/// Execute a bitmap heap scan over `bitmap`, invoking `fetcher`
48/// for each surviving TID in sorted order. Returns the
49/// materialised rows in the same TID order.
50///
51/// `rows_per_page` is the table's fixed row density — the
52/// planner supplies this from schema metadata. For reddb's
53/// default 8 KB pages with ~64-byte rows it's ~128.
54///
55/// Three-phase algorithm:
56///
57/// 1. **Group by page**: `bitmap.group_by_page(rows_per_page)`
58///    returns `(page_id, Vec<row_within_page>)` sorted by
59///    page. This turns the iteration into a sequential-read-
60///    friendly pattern.
61/// 2. **Fetch each page's rows**: for each page group, the
62///    fetcher is called once per target row within that page.
63///    The fetcher is responsible for caching the page's
64///    buffer so repeated fetches within the same page don't
65///    re-read the disk.
66/// 3. **Materialise the output**: rows from all pages flow
67///    into a single result Vec in their natural ascending
68///    TID order.
69pub fn execute_bitmap_scan<F: RowFetcher>(
70    bitmap: &TidBitmap,
71    fetcher: &F,
72    rows_per_page: u32,
73) -> Result<Vec<F::Row>, F::Error> {
74    let groups = bitmap.group_by_page(rows_per_page);
75    // Pre-allocate capacity for the expected output size —
76    // bitmap::len() gives exact row count since the bitmap
77    // is not lossy.
78    let mut out: Vec<F::Row> = Vec::with_capacity(bitmap.len() as usize);
79    for (page_id, rows_within) in groups {
80        for row in rows_within {
81            if let Some(row) = fetcher.fetch(page_id, row)? {
82                out.push(row);
83            }
84        }
85    }
86    Ok(out)
87}
88
89/// Summary statistics the bitmap scan returns alongside its
90/// output. Useful for `EXPLAIN ANALYZE`-style diagnostics and
91/// for the planner's feedback loop when adjusting cost
92/// estimates based on actual selectivity.
93#[derive(Debug, Clone, Default)]
94pub struct BitmapScanStats {
95    /// Total candidate TIDs the bitmap contained before
96    /// fetching.
97    pub candidate_tids: u64,
98    /// Rows actually returned (candidates minus tombstones).
99    pub rows_returned: u64,
100    /// Distinct pages touched during the scan. A good proxy
101    /// for physical I/O: n pages × buffer-pool-hit ratio.
102    pub pages_touched: u64,
103}
104
105impl BitmapScanStats {
106    /// Returns the fraction of candidates that survived the
107    /// tombstone check. Values near 1.0 mean the bitmap is
108    /// well-pruned; values near 0.0 mean the index was stale
109    /// and VACUUM should run.
110    pub fn survival_ratio(&self) -> f64 {
111        if self.candidate_tids == 0 {
112            return 0.0;
113        }
114        self.rows_returned as f64 / self.candidate_tids as f64
115    }
116}
117
118/// Variant of `execute_bitmap_scan` that also fills a
119/// `BitmapScanStats` struct alongside the row output. Used
120/// by `EXPLAIN ANALYZE` paths and by the runtime's
121/// cardinality feedback loop.
122pub fn execute_bitmap_scan_with_stats<F: RowFetcher>(
123    bitmap: &TidBitmap,
124    fetcher: &F,
125    rows_per_page: u32,
126) -> Result<(Vec<F::Row>, BitmapScanStats), F::Error> {
127    let groups = bitmap.group_by_page(rows_per_page);
128    let mut stats = BitmapScanStats {
129        candidate_tids: bitmap.len(),
130        rows_returned: 0,
131        pages_touched: groups.len() as u64,
132    };
133    let mut out: Vec<F::Row> = Vec::with_capacity(bitmap.len() as usize);
134    for (page_id, rows_within) in groups {
135        for row in rows_within {
136            if let Some(row) = fetcher.fetch(page_id, row)? {
137                out.push(row);
138                stats.rows_returned += 1;
139            }
140        }
141    }
142    Ok((out, stats))
143}
144
145/// Phase 3.6 wiring entry point. Combines a list of per-index
146/// bitmaps via the requested boolean op and runs a single heap
147/// fetch over the result. The planner uses this when WHERE has
148/// multi-index AND/OR predicates.
149///
150/// `combine` controls the merge: `BitmapCombine::And` produces
151/// the intersection (rows matching every index), `Or` produces
152/// the union (rows matching any index).
153///
154/// Returns the scan rows in TID-sorted order. Caller can wrap
155/// in `execute_bitmap_scan_with_stats` instead if it wants the
156/// diagnostic counters.
157pub fn execute_combined_bitmap_scan<F: RowFetcher>(
158    bitmaps: Vec<TidBitmap>,
159    combine: BitmapCombine,
160    fetcher: &F,
161    rows_per_page: u32,
162) -> Result<Vec<F::Row>, F::Error> {
163    let merged = match combine {
164        BitmapCombine::And => crate::storage::index::tid_bitmap::intersect_all(bitmaps),
165        BitmapCombine::Or => crate::storage::index::tid_bitmap::union_all(bitmaps),
166    };
167    execute_bitmap_scan(&merged, fetcher, rows_per_page)
168}
169
170/// Boolean combinator passed into `execute_combined_bitmap_scan`.
171#[derive(Debug, Clone, Copy, PartialEq, Eq)]
172pub enum BitmapCombine {
173    And,
174    Or,
175}