Skip to main content

common/database/
block_offset_index.rs

1//! Sorted index of marker (block or table-anchor) → rope byte range.
2//!
3//! Tracks the byte offset at which each marker starts in the global
4//! rope, plus the rope's total byte length. A marker is either a
5//! real `Block` or a `TableAnchor` — the U+FFFC sentinel that
6//! occupies a line on its own in a parent frame's range to represent
7//! an embedded table (plan §1.6). Each marker extends from its
8//! `byte_start` to the next marker's `byte_start` (or to
9//! `total_bytes` for the last entry).
10//!
11//! Invariants:
12//! - `entries` is sorted by `byte_start` ascending.
13//! - No two entries share the same `byte_start`.
14//! - The last entry's `byte_start ≤ total_bytes`.
15//! - Empty `entries` ⟺ no markers in the document.
16
17use crate::types::EntityId;
18use im::HashMap as ImHashMap;
19use std::sync::Arc;
20
21/// Discriminates real blocks from table-anchor sentinels in the
22/// offset index. The wrapped `EntityId` is the corresponding entity's
23/// id (a Block id or a Table id).
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
25pub enum OffsetMarker {
26    Block(EntityId),
27    TableAnchor(EntityId),
28}
29
30impl OffsetMarker {
31    pub fn as_block(self) -> Option<EntityId> {
32        match self {
33            OffsetMarker::Block(id) => Some(id),
34            _ => None,
35        }
36    }
37
38    pub fn as_table_anchor(self) -> Option<EntityId> {
39        match self {
40            OffsetMarker::TableAnchor(id) => Some(id),
41            _ => None,
42        }
43    }
44
45    pub fn is_block(self) -> bool {
46        matches!(self, OffsetMarker::Block(_))
47    }
48}
49
50#[derive(Debug, Clone, Default, PartialEq, Eq)]
51pub struct BlockOffsetIndex {
52    /// `(marker, byte_start)` pairs sorted by `byte_start` ascending.
53    ///
54    /// Wrapped in `Arc` so the per-edit snapshot path (which clones
55    /// `BlockOffsetIndex` as part of `Store::snapshot`) does an O(1)
56    /// pointer bump instead of an O(N) memcpy of the underlying Vec.
57    /// Mutators in this `impl` block go through `Arc::make_mut`, which
58    /// deep-clones the Vec only on the first write after the Arc has
59    /// been shared (i.e., after a snapshot was taken). External
60    /// callers see `Arc<Vec<...>>` which derefs transparently to
61    /// `&[...]` for reads.
62    pub entries: Arc<Vec<(OffsetMarker, u32)>>,
63
64    /// Total byte length of the rope this index describes. The last
65    /// entry extends from its `byte_start` to this value.
66    pub total_bytes: u32,
67
68    /// O(1)-average lookup from marker to its position in `entries`.
69    /// Maintained eagerly by the `push` / `push_block` / `insert_at` /
70    /// `remove_at` / `clear` methods on this type — direct mutation of
71    /// `entries` from outside leaves this cache stale, so callers must
72    /// go through those methods (or call `rebuild_marker_index` after
73    /// mutating in bulk).
74    ///
75    /// Stored as `im::HashMap` so the snapshot-clone path stays O(1).
76    /// `shift_after` does not move positions, only byte_starts — so
77    /// this map is unaffected by shifts.
78    marker_index: ImHashMap<OffsetMarker, usize>,
79
80    /// Cached count of `OffsetMarker::TableAnchor` entries. Maintained
81    /// by the same mutator methods that maintain `marker_index`. Used
82    /// by `rope_helpers::rope_positions_match_flow` to gate the
83    /// per-edit position-refresh loop in O(1) instead of an O(N) walk
84    /// over `entries`.
85    table_anchor_count: usize,
86}
87
88impl BlockOffsetIndex {
89    pub fn new() -> Self {
90        Self::default()
91    }
92
93    pub fn len(&self) -> usize {
94        self.entries.len()
95    }
96
97    pub fn is_empty(&self) -> bool {
98        self.entries.is_empty()
99    }
100
101    pub fn total_bytes(&self) -> u32 {
102        self.total_bytes
103    }
104
105    pub fn set_total_bytes(&mut self, total: u32) {
106        self.total_bytes = total;
107    }
108
109    /// Number of `OffsetMarker::TableAnchor` entries currently indexed.
110    /// O(1) via a maintained counter. Used to gate flow-position
111    /// derivation: rope-derived positions match `Block.document_position`
112    /// only when this is zero.
113    pub fn table_anchor_count(&self) -> usize {
114        self.table_anchor_count
115    }
116
117    /// Insert a marker at a given byte position. The caller is
118    /// responsible for keeping the `byte_start` ordered relative to
119    /// neighbours — this method does NOT re-sort.
120    pub fn insert_at(&mut self, position: usize, marker: OffsetMarker, byte_start: u32) {
121        Arc::make_mut(&mut self.entries).insert(position, (marker, byte_start));
122        // All markers at positions ≥ `position` shifted up by 1.
123        for (m, p) in self.marker_index.iter_mut() {
124            if *p >= position && *m != marker {
125                *p += 1;
126            }
127        }
128        self.marker_index.insert(marker, position);
129        if matches!(marker, OffsetMarker::TableAnchor(_)) {
130            self.table_anchor_count += 1;
131        }
132    }
133
134    /// Append a marker at the end (its `byte_start` must be ≥ the last
135    /// entry's `byte_start`).
136    pub fn push(&mut self, marker: OffsetMarker, byte_start: u32) {
137        debug_assert!(
138            self.entries
139                .last()
140                .map(|(_, bs)| byte_start >= *bs)
141                .unwrap_or(true),
142            "push must preserve ordering"
143        );
144        let position = self.entries.len();
145        Arc::make_mut(&mut self.entries).push((marker, byte_start));
146        self.marker_index.insert(marker, position);
147        if matches!(marker, OffsetMarker::TableAnchor(_)) {
148            self.table_anchor_count += 1;
149        }
150    }
151
152    /// Convenience: register a block by id. Equivalent to
153    /// `push(OffsetMarker::Block(id), byte_start)`.
154    pub fn push_block(&mut self, block_id: EntityId, byte_start: u32) {
155        self.push(OffsetMarker::Block(block_id), byte_start);
156    }
157
158    /// Remove the entry at the given position. Panics if out of bounds.
159    pub fn remove_at(&mut self, position: usize) -> (OffsetMarker, u32) {
160        let removed = Arc::make_mut(&mut self.entries).remove(position);
161        self.marker_index.remove(&removed.0);
162        // All markers at positions > `position` shifted down by 1.
163        for (_, p) in self.marker_index.iter_mut() {
164            if *p > position {
165                *p -= 1;
166            }
167        }
168        if matches!(removed.0, OffsetMarker::TableAnchor(_)) {
169            self.table_anchor_count = self.table_anchor_count.saturating_sub(1);
170        }
171        removed
172    }
173
174    /// Remove a contiguous range of entries, equivalent to
175    /// `entries.drain(start..=end_inclusive)` plus the matching
176    /// marker_index maintenance. Returns the removed entries.
177    pub fn drain_inclusive(
178        &mut self,
179        start: usize,
180        end_inclusive: usize,
181    ) -> Vec<(OffsetMarker, u32)> {
182        let removed: Vec<_> = Arc::make_mut(&mut self.entries)
183            .drain(start..=end_inclusive)
184            .collect();
185        let removed_anchors = removed
186            .iter()
187            .filter(|(m, _)| matches!(m, OffsetMarker::TableAnchor(_)))
188            .count();
189        self.table_anchor_count = self.table_anchor_count.saturating_sub(removed_anchors);
190        for (m, _) in &removed {
191            self.marker_index.remove(m);
192        }
193        let shift = removed.len();
194        // All markers at positions > end_inclusive shift down by `shift`.
195        for (_, p) in self.marker_index.iter_mut() {
196            if *p > end_inclusive {
197                *p -= shift;
198            }
199        }
200        removed
201    }
202
203    /// Drop every entry and reset `total_bytes` to zero. Equivalent to
204    /// `*self = Self::default()` but expressed as a method so callers
205    /// don't need to depend on `Default`.
206    pub fn clear(&mut self) {
207        Arc::make_mut(&mut self.entries).clear();
208        self.marker_index = ImHashMap::new();
209        self.total_bytes = 0;
210        self.table_anchor_count = 0;
211    }
212
213    /// Rebuild `marker_index` and `table_anchor_count` from `entries`.
214    /// Use after bulk mutations that bypassed the maintenance methods
215    /// (e.g. raw `entries.push` in test setup).
216    pub fn rebuild_marker_index(&mut self) {
217        let mut idx = ImHashMap::new();
218        let mut anchors = 0usize;
219        for (i, (m, _)) in self.entries.iter().enumerate() {
220            idx.insert(*m, i);
221            if matches!(m, OffsetMarker::TableAnchor(_)) {
222                anchors += 1;
223            }
224        }
225        self.marker_index = idx;
226        self.table_anchor_count = anchors;
227    }
228
229    /// Byte range `(start, end)` of a marker. `end` is the next
230    /// marker's `byte_start` (or `total_bytes` for the last entry).
231    /// Returns `None` if the marker is not indexed.
232    ///
233    /// O(1) average via the `marker_index` map.
234    pub fn range_of(&self, marker: OffsetMarker) -> Option<(u32, u32)> {
235        let idx = *self.marker_index.get(&marker)?;
236        let start = self.entries[idx].1;
237        let end = self
238            .entries
239            .get(idx + 1)
240            .map(|(_, bs)| *bs)
241            .unwrap_or(self.total_bytes);
242        Some((start, end))
243    }
244
245    /// Byte range for a block-id specifically. Convenience for the
246    /// common case.
247    pub fn range_of_block(&self, block_id: EntityId) -> Option<(u32, u32)> {
248        self.range_of(OffsetMarker::Block(block_id))
249    }
250
251    /// Like `range_of`, but also reports whether the marker has a
252    /// successor entry. Callers that need to strip the trailing
253    /// inter-block `\n` boundary (e.g. `block_content_via_store`,
254    /// `block_char_length`) use this to distinguish "end == next
255    /// marker's byte_start, with a real `\n` between us" from "end
256    /// == total_bytes, no separator after".
257    ///
258    /// O(1) average via `marker_index`.
259    pub fn range_with_successor(&self, marker: OffsetMarker) -> Option<(u32, u32, bool)> {
260        let idx = *self.marker_index.get(&marker)?;
261        let start = self.entries[idx].1;
262        let has_successor = idx + 1 < self.entries.len();
263        let end = if has_successor {
264            self.entries[idx + 1].1
265        } else {
266            self.total_bytes
267        };
268        Some((start, end, has_successor))
269    }
270
271    /// Position of `marker` in `entries`. O(1) average via the
272    /// `marker_index` cache. Returns `None` if the marker is not
273    /// indexed.
274    pub fn position_of(&self, marker: OffsetMarker) -> Option<usize> {
275        self.marker_index.get(&marker).copied()
276    }
277
278    /// Marker whose byte range covers `byte`. Returns `None` if the
279    /// index is empty or `byte` falls past `total_bytes`.
280    ///
281    /// `byte == total_bytes` is treated as belonging to the last entry
282    /// (this is the cursor-at-end-of-document case).
283    pub fn marker_at_byte(&self, byte: u32) -> Option<OffsetMarker> {
284        if self.entries.is_empty() {
285            return None;
286        }
287        if byte > self.total_bytes {
288            return None;
289        }
290        let idx = match self.entries.binary_search_by_key(&byte, |(_, bs)| *bs) {
291            Ok(i) => i,
292            Err(0) => return None,
293            Err(i) => i - 1,
294        };
295        Some(self.entries[idx].0)
296    }
297
298    /// Block id whose byte range covers `byte`, ignoring table-anchor
299    /// markers. Returns `None` if no block covers the byte.
300    pub fn block_at_byte(&self, byte: u32) -> Option<EntityId> {
301        self.marker_at_byte(byte).and_then(|m| m.as_block())
302    }
303
304    /// Convert an absolute rope byte offset into
305    /// `(marker, byte_in_marker)`. Returns `None` for offsets past the
306    /// end or for an empty index.
307    pub fn byte_to_marker_byte(&self, byte: u32) -> Option<(OffsetMarker, u32)> {
308        let marker = self.marker_at_byte(byte)?;
309        let (start, _) = self.range_of(marker)?;
310        Some((marker, byte - start))
311    }
312
313    /// Block-only variant of `byte_to_marker_byte`.
314    pub fn byte_to_block_byte(&self, byte: u32) -> Option<(EntityId, u32)> {
315        let block_id = self.block_at_byte(byte)?;
316        let (start, _) = self.range_of_block(block_id)?;
317        Some((block_id, byte - start))
318    }
319
320    /// Shift every entry whose `byte_start ≥ threshold` by `delta`
321    /// bytes, and adjust `total_bytes` by `delta`. Used after a rope
322    /// insert (positive delta) or delete (negative delta) to keep the
323    /// index in sync without a full rebuild.
324    ///
325    /// `entries` is sorted by `byte_start`, so the affected suffix is
326    /// located via `partition_point` (O(log n)) and only that suffix
327    /// is walked.
328    pub fn shift_after(&mut self, threshold: u32, delta: i32) {
329        let start = self.entries.partition_point(|(_, bs)| *bs < threshold);
330        if start < self.entries.len() {
331            for (_, bs) in Arc::make_mut(&mut self.entries)[start..].iter_mut() {
332                *bs = apply_delta(*bs, delta);
333            }
334        }
335        self.total_bytes = apply_delta(self.total_bytes, delta);
336    }
337}
338
339fn apply_delta(value: u32, delta: i32) -> u32 {
340    if delta >= 0 {
341        value
342            .checked_add(delta as u32)
343            .expect("byte offset overflow")
344    } else {
345        let abs = (-delta) as u32;
346        value
347            .checked_sub(abs)
348            .expect("byte offset would go negative")
349    }
350}