Skip to main content

common/database/
block_offset_index.rs

1//! Sorted index of marker (block or table-anchor) → rope byte range.
2//!
3//! Tracks the byte offset at which each marker starts in the global
4//! rope, plus the rope's total byte length. A marker is either a
5//! real `Block` or a `TableAnchor` — the U+FFFC sentinel that
6//! occupies a line on its own in a parent frame's range to represent
7//! an embedded table (plan §1.6). Each marker extends from its
8//! `byte_start` to the next marker's `byte_start` (or to
9//! `total_bytes` for the last entry).
10//!
11//! Invariants:
12//! - `entries` is sorted by `byte_start` ascending.
13//! - No two entries share the same `byte_start`.
14//! - The last entry's `byte_start ≤ total_bytes`.
15//! - Empty `entries` ⟺ no markers in the document.
16
17use crate::types::EntityId;
18use im::HashMap as ImHashMap;
19
20/// Discriminates real blocks from table-anchor sentinels in the
21/// offset index. The wrapped `EntityId` is the corresponding entity's
22/// id (a Block id or a Table id).
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
24pub enum OffsetMarker {
25    Block(EntityId),
26    TableAnchor(EntityId),
27}
28
29impl OffsetMarker {
30    pub fn as_block(self) -> Option<EntityId> {
31        match self {
32            OffsetMarker::Block(id) => Some(id),
33            _ => None,
34        }
35    }
36
37    pub fn as_table_anchor(self) -> Option<EntityId> {
38        match self {
39            OffsetMarker::TableAnchor(id) => Some(id),
40            _ => None,
41        }
42    }
43
44    pub fn is_block(self) -> bool {
45        matches!(self, OffsetMarker::Block(_))
46    }
47}
48
49#[derive(Debug, Clone, Default, PartialEq, Eq)]
50pub struct BlockOffsetIndex {
51    /// `(marker, byte_start)` pairs sorted by `byte_start` ascending.
52    pub entries: Vec<(OffsetMarker, u32)>,
53
54    /// Total byte length of the rope this index describes. The last
55    /// entry extends from its `byte_start` to this value.
56    pub total_bytes: u32,
57
58    /// O(1)-average lookup from marker to its position in `entries`.
59    /// Maintained eagerly by the `push` / `push_block` / `insert_at` /
60    /// `remove_at` / `clear` methods on this type — direct mutation of
61    /// `entries` from outside leaves this cache stale, so callers must
62    /// go through those methods (or call `rebuild_marker_index` after
63    /// mutating in bulk).
64    ///
65    /// Stored as `im::HashMap` so the snapshot-clone path stays O(1).
66    /// `shift_after` does not move positions, only byte_starts — so
67    /// this map is unaffected by shifts.
68    marker_index: ImHashMap<OffsetMarker, usize>,
69}
70
71impl BlockOffsetIndex {
72    pub fn new() -> Self {
73        Self::default()
74    }
75
76    pub fn len(&self) -> usize {
77        self.entries.len()
78    }
79
80    pub fn is_empty(&self) -> bool {
81        self.entries.is_empty()
82    }
83
84    pub fn total_bytes(&self) -> u32 {
85        self.total_bytes
86    }
87
88    pub fn set_total_bytes(&mut self, total: u32) {
89        self.total_bytes = total;
90    }
91
92    /// Insert a marker at a given byte position. The caller is
93    /// responsible for keeping the `byte_start` ordered relative to
94    /// neighbours — this method does NOT re-sort.
95    pub fn insert_at(&mut self, position: usize, marker: OffsetMarker, byte_start: u32) {
96        self.entries.insert(position, (marker, byte_start));
97        // All markers at positions ≥ `position` shifted up by 1.
98        for (m, p) in self.marker_index.iter_mut() {
99            if *p >= position && *m != marker {
100                *p += 1;
101            }
102        }
103        self.marker_index.insert(marker, position);
104    }
105
106    /// Append a marker at the end (its `byte_start` must be ≥ the last
107    /// entry's `byte_start`).
108    pub fn push(&mut self, marker: OffsetMarker, byte_start: u32) {
109        debug_assert!(
110            self.entries
111                .last()
112                .map(|(_, bs)| byte_start >= *bs)
113                .unwrap_or(true),
114            "push must preserve ordering"
115        );
116        let position = self.entries.len();
117        self.entries.push((marker, byte_start));
118        self.marker_index.insert(marker, position);
119    }
120
121    /// Convenience: register a block by id. Equivalent to
122    /// `push(OffsetMarker::Block(id), byte_start)`.
123    pub fn push_block(&mut self, block_id: EntityId, byte_start: u32) {
124        self.push(OffsetMarker::Block(block_id), byte_start);
125    }
126
127    /// Remove the entry at the given position. Panics if out of bounds.
128    pub fn remove_at(&mut self, position: usize) -> (OffsetMarker, u32) {
129        let removed = self.entries.remove(position);
130        self.marker_index.remove(&removed.0);
131        // All markers at positions > `position` shifted down by 1.
132        for (_, p) in self.marker_index.iter_mut() {
133            if *p > position {
134                *p -= 1;
135            }
136        }
137        removed
138    }
139
140    /// Remove a contiguous range of entries, equivalent to
141    /// `entries.drain(start..=end_inclusive)` plus the matching
142    /// marker_index maintenance. Returns the removed entries.
143    pub fn drain_inclusive(
144        &mut self,
145        start: usize,
146        end_inclusive: usize,
147    ) -> Vec<(OffsetMarker, u32)> {
148        let removed: Vec<_> = self.entries.drain(start..=end_inclusive).collect();
149        for (m, _) in &removed {
150            self.marker_index.remove(m);
151        }
152        let shift = removed.len();
153        // All markers at positions > end_inclusive shift down by `shift`.
154        for (_, p) in self.marker_index.iter_mut() {
155            if *p > end_inclusive {
156                *p -= shift;
157            }
158        }
159        removed
160    }
161
162    /// Drop every entry and reset `total_bytes` to zero. Equivalent to
163    /// `*self = Self::default()` but expressed as a method so callers
164    /// don't need to depend on `Default`.
165    pub fn clear(&mut self) {
166        self.entries.clear();
167        self.marker_index = ImHashMap::new();
168        self.total_bytes = 0;
169    }
170
171    /// Rebuild `marker_index` from `entries`. Use after bulk mutations
172    /// that bypassed the maintenance methods (e.g. raw `entries.push`
173    /// in test setup).
174    pub fn rebuild_marker_index(&mut self) {
175        let mut idx = ImHashMap::new();
176        for (i, (m, _)) in self.entries.iter().enumerate() {
177            idx.insert(*m, i);
178        }
179        self.marker_index = idx;
180    }
181
182    /// Byte range `(start, end)` of a marker. `end` is the next
183    /// marker's `byte_start` (or `total_bytes` for the last entry).
184    /// Returns `None` if the marker is not indexed.
185    ///
186    /// O(1) average via the `marker_index` map.
187    pub fn range_of(&self, marker: OffsetMarker) -> Option<(u32, u32)> {
188        let idx = *self.marker_index.get(&marker)?;
189        let start = self.entries[idx].1;
190        let end = self
191            .entries
192            .get(idx + 1)
193            .map(|(_, bs)| *bs)
194            .unwrap_or(self.total_bytes);
195        Some((start, end))
196    }
197
198    /// Byte range for a block-id specifically. Convenience for the
199    /// common case.
200    pub fn range_of_block(&self, block_id: EntityId) -> Option<(u32, u32)> {
201        self.range_of(OffsetMarker::Block(block_id))
202    }
203
204    /// Like `range_of`, but also reports whether the marker has a
205    /// successor entry. Callers that need to strip the trailing
206    /// inter-block `\n` boundary (e.g. `block_content_via_store`,
207    /// `block_char_length`) use this to distinguish "end == next
208    /// marker's byte_start, with a real `\n` between us" from "end
209    /// == total_bytes, no separator after".
210    ///
211    /// O(1) average via `marker_index`.
212    pub fn range_with_successor(&self, marker: OffsetMarker) -> Option<(u32, u32, bool)> {
213        let idx = *self.marker_index.get(&marker)?;
214        let start = self.entries[idx].1;
215        let has_successor = idx + 1 < self.entries.len();
216        let end = if has_successor {
217            self.entries[idx + 1].1
218        } else {
219            self.total_bytes
220        };
221        Some((start, end, has_successor))
222    }
223
224    /// Position of `marker` in `entries`. O(1) average via the
225    /// `marker_index` cache. Returns `None` if the marker is not
226    /// indexed.
227    pub fn position_of(&self, marker: OffsetMarker) -> Option<usize> {
228        self.marker_index.get(&marker).copied()
229    }
230
231    /// Marker whose byte range covers `byte`. Returns `None` if the
232    /// index is empty or `byte` falls past `total_bytes`.
233    ///
234    /// `byte == total_bytes` is treated as belonging to the last entry
235    /// (this is the cursor-at-end-of-document case).
236    pub fn marker_at_byte(&self, byte: u32) -> Option<OffsetMarker> {
237        if self.entries.is_empty() {
238            return None;
239        }
240        if byte > self.total_bytes {
241            return None;
242        }
243        let idx = match self.entries.binary_search_by_key(&byte, |(_, bs)| *bs) {
244            Ok(i) => i,
245            Err(0) => return None,
246            Err(i) => i - 1,
247        };
248        Some(self.entries[idx].0)
249    }
250
251    /// Block id whose byte range covers `byte`, ignoring table-anchor
252    /// markers. Returns `None` if no block covers the byte.
253    pub fn block_at_byte(&self, byte: u32) -> Option<EntityId> {
254        self.marker_at_byte(byte).and_then(|m| m.as_block())
255    }
256
257    /// Convert an absolute rope byte offset into
258    /// `(marker, byte_in_marker)`. Returns `None` for offsets past the
259    /// end or for an empty index.
260    pub fn byte_to_marker_byte(&self, byte: u32) -> Option<(OffsetMarker, u32)> {
261        let marker = self.marker_at_byte(byte)?;
262        let (start, _) = self.range_of(marker)?;
263        Some((marker, byte - start))
264    }
265
266    /// Block-only variant of `byte_to_marker_byte`.
267    pub fn byte_to_block_byte(&self, byte: u32) -> Option<(EntityId, u32)> {
268        let block_id = self.block_at_byte(byte)?;
269        let (start, _) = self.range_of_block(block_id)?;
270        Some((block_id, byte - start))
271    }
272
273    /// Shift every entry whose `byte_start ≥ threshold` by `delta`
274    /// bytes, and adjust `total_bytes` by `delta`. Used after a rope
275    /// insert (positive delta) or delete (negative delta) to keep the
276    /// index in sync without a full rebuild.
277    pub fn shift_after(&mut self, threshold: u32, delta: i32) {
278        for (_, bs) in self.entries.iter_mut() {
279            if *bs >= threshold {
280                *bs = apply_delta(*bs, delta);
281            }
282        }
283        self.total_bytes = apply_delta(self.total_bytes, delta);
284    }
285}
286
287fn apply_delta(value: u32, delta: i32) -> u32 {
288    if delta >= 0 {
289        value
290            .checked_add(delta as u32)
291            .expect("byte offset overflow")
292    } else {
293        let abs = (-delta) as u32;
294        value
295            .checked_sub(abs)
296            .expect("byte offset would go negative")
297    }
298}