common/database/block_offset_index.rs
1//! Sorted index of marker (block or table-anchor) → rope byte range.
2//!
3//! Tracks the byte offset at which each marker starts in the global
4//! rope, plus the rope's total byte length. A marker is either a
5//! real `Block` or a `TableAnchor` — the U+FFFC sentinel that
6//! occupies a line on its own in a parent frame's range to represent
7//! an embedded table (plan §1.6). Each marker extends from its
8//! `byte_start` to the next marker's `byte_start` (or to
9//! `total_bytes` for the last entry).
10//!
11//! Invariants:
12//! - `entries` is sorted by `byte_start` ascending.
13//! - No two entries share the same `byte_start`.
14//! - The last entry's `byte_start ≤ total_bytes`.
15//! - Empty `entries` ⟺ no markers in the document.
16
17use crate::types::EntityId;
18use im::HashMap as ImHashMap;
19use std::sync::Arc;
20
21/// Discriminates real blocks from table-anchor sentinels in the
22/// offset index. The wrapped `EntityId` is the corresponding entity's
23/// id (a Block id or a Table id).
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
25pub enum OffsetMarker {
26 Block(EntityId),
27 TableAnchor(EntityId),
28}
29
30impl OffsetMarker {
31 pub fn as_block(self) -> Option<EntityId> {
32 match self {
33 OffsetMarker::Block(id) => Some(id),
34 _ => None,
35 }
36 }
37
38 pub fn as_table_anchor(self) -> Option<EntityId> {
39 match self {
40 OffsetMarker::TableAnchor(id) => Some(id),
41 _ => None,
42 }
43 }
44
45 pub fn is_block(self) -> bool {
46 matches!(self, OffsetMarker::Block(_))
47 }
48}
49
50#[derive(Debug, Clone, Default, PartialEq, Eq)]
51pub struct BlockOffsetIndex {
52 /// `(marker, byte_start)` pairs sorted by `byte_start` ascending.
53 ///
54 /// Wrapped in `Arc` so the per-edit snapshot path (which clones
55 /// `BlockOffsetIndex` as part of `Store::snapshot`) does an O(1)
56 /// pointer bump instead of an O(N) memcpy of the underlying Vec.
57 /// Mutators in this `impl` block go through `Arc::make_mut`, which
58 /// deep-clones the Vec only on the first write after the Arc has
59 /// been shared (i.e., after a snapshot was taken). External
60 /// callers see `Arc<Vec<...>>` which derefs transparently to
61 /// `&[...]` for reads.
62 pub entries: Arc<Vec<(OffsetMarker, u32)>>,
63
64 /// Total byte length of the rope this index describes. The last
65 /// entry extends from its `byte_start` to this value.
66 pub total_bytes: u32,
67
68 /// O(1)-average lookup from marker to its position in `entries`.
69 /// Maintained eagerly by the `push` / `push_block` / `insert_at` /
70 /// `remove_at` / `clear` methods on this type — direct mutation of
71 /// `entries` from outside leaves this cache stale, so callers must
72 /// go through those methods (or call `rebuild_marker_index` after
73 /// mutating in bulk).
74 ///
75 /// Stored as `im::HashMap` so the snapshot-clone path stays O(1).
76 /// `shift_after` does not move positions, only byte_starts — so
77 /// this map is unaffected by shifts.
78 marker_index: ImHashMap<OffsetMarker, usize>,
79
80 /// Cached count of `OffsetMarker::TableAnchor` entries. Maintained
81 /// by the same mutator methods that maintain `marker_index`. Used
82 /// by `rope_helpers::rope_positions_match_flow` to gate the
83 /// per-edit position-refresh loop in O(1) instead of an O(N) walk
84 /// over `entries`.
85 table_anchor_count: usize,
86}
87
88impl BlockOffsetIndex {
89 pub fn new() -> Self {
90 Self::default()
91 }
92
93 pub fn len(&self) -> usize {
94 self.entries.len()
95 }
96
97 pub fn is_empty(&self) -> bool {
98 self.entries.is_empty()
99 }
100
101 pub fn total_bytes(&self) -> u32 {
102 self.total_bytes
103 }
104
105 pub fn set_total_bytes(&mut self, total: u32) {
106 self.total_bytes = total;
107 }
108
109 /// Number of `OffsetMarker::TableAnchor` entries currently indexed.
110 /// O(1) via a maintained counter. Used to gate flow-position
111 /// derivation: rope-derived positions match `Block.document_position`
112 /// only when this is zero.
113 pub fn table_anchor_count(&self) -> usize {
114 self.table_anchor_count
115 }
116
117 /// Insert a marker at a given byte position. The caller is
118 /// responsible for keeping the `byte_start` ordered relative to
119 /// neighbours — this method does NOT re-sort.
120 pub fn insert_at(&mut self, position: usize, marker: OffsetMarker, byte_start: u32) {
121 Arc::make_mut(&mut self.entries).insert(position, (marker, byte_start));
122 // All markers at positions ≥ `position` shifted up by 1.
123 for (m, p) in self.marker_index.iter_mut() {
124 if *p >= position && *m != marker {
125 *p += 1;
126 }
127 }
128 self.marker_index.insert(marker, position);
129 if matches!(marker, OffsetMarker::TableAnchor(_)) {
130 self.table_anchor_count += 1;
131 }
132 }
133
134 /// Append a marker at the end (its `byte_start` must be ≥ the last
135 /// entry's `byte_start`).
136 pub fn push(&mut self, marker: OffsetMarker, byte_start: u32) {
137 debug_assert!(
138 self.entries
139 .last()
140 .map(|(_, bs)| byte_start >= *bs)
141 .unwrap_or(true),
142 "push must preserve ordering"
143 );
144 let position = self.entries.len();
145 Arc::make_mut(&mut self.entries).push((marker, byte_start));
146 self.marker_index.insert(marker, position);
147 if matches!(marker, OffsetMarker::TableAnchor(_)) {
148 self.table_anchor_count += 1;
149 }
150 }
151
152 /// Convenience: register a block by id. Equivalent to
153 /// `push(OffsetMarker::Block(id), byte_start)`.
154 pub fn push_block(&mut self, block_id: EntityId, byte_start: u32) {
155 self.push(OffsetMarker::Block(block_id), byte_start);
156 }
157
158 /// Remove the entry at the given position. Panics if out of bounds.
159 pub fn remove_at(&mut self, position: usize) -> (OffsetMarker, u32) {
160 let removed = Arc::make_mut(&mut self.entries).remove(position);
161 self.marker_index.remove(&removed.0);
162 // All markers at positions > `position` shifted down by 1.
163 for (_, p) in self.marker_index.iter_mut() {
164 if *p > position {
165 *p -= 1;
166 }
167 }
168 if matches!(removed.0, OffsetMarker::TableAnchor(_)) {
169 self.table_anchor_count = self.table_anchor_count.saturating_sub(1);
170 }
171 removed
172 }
173
174 /// Remove a contiguous range of entries, equivalent to
175 /// `entries.drain(start..=end_inclusive)` plus the matching
176 /// marker_index maintenance. Returns the removed entries.
177 pub fn drain_inclusive(
178 &mut self,
179 start: usize,
180 end_inclusive: usize,
181 ) -> Vec<(OffsetMarker, u32)> {
182 let removed: Vec<_> = Arc::make_mut(&mut self.entries)
183 .drain(start..=end_inclusive)
184 .collect();
185 let removed_anchors = removed
186 .iter()
187 .filter(|(m, _)| matches!(m, OffsetMarker::TableAnchor(_)))
188 .count();
189 self.table_anchor_count = self.table_anchor_count.saturating_sub(removed_anchors);
190 for (m, _) in &removed {
191 self.marker_index.remove(m);
192 }
193 let shift = removed.len();
194 // All markers at positions > end_inclusive shift down by `shift`.
195 for (_, p) in self.marker_index.iter_mut() {
196 if *p > end_inclusive {
197 *p -= shift;
198 }
199 }
200 removed
201 }
202
203 /// Drop every entry and reset `total_bytes` to zero. Equivalent to
204 /// `*self = Self::default()` but expressed as a method so callers
205 /// don't need to depend on `Default`.
206 pub fn clear(&mut self) {
207 Arc::make_mut(&mut self.entries).clear();
208 self.marker_index = ImHashMap::new();
209 self.total_bytes = 0;
210 self.table_anchor_count = 0;
211 }
212
213 /// Rebuild `marker_index` and `table_anchor_count` from `entries`.
214 /// Use after bulk mutations that bypassed the maintenance methods
215 /// (e.g. raw `entries.push` in test setup).
216 pub fn rebuild_marker_index(&mut self) {
217 let mut idx = ImHashMap::new();
218 let mut anchors = 0usize;
219 for (i, (m, _)) in self.entries.iter().enumerate() {
220 idx.insert(*m, i);
221 if matches!(m, OffsetMarker::TableAnchor(_)) {
222 anchors += 1;
223 }
224 }
225 self.marker_index = idx;
226 self.table_anchor_count = anchors;
227 }
228
229 /// Byte range `(start, end)` of a marker. `end` is the next
230 /// marker's `byte_start` (or `total_bytes` for the last entry).
231 /// Returns `None` if the marker is not indexed.
232 ///
233 /// O(1) average via the `marker_index` map.
234 pub fn range_of(&self, marker: OffsetMarker) -> Option<(u32, u32)> {
235 let idx = *self.marker_index.get(&marker)?;
236 let start = self.entries[idx].1;
237 let end = self
238 .entries
239 .get(idx + 1)
240 .map(|(_, bs)| *bs)
241 .unwrap_or(self.total_bytes);
242 Some((start, end))
243 }
244
245 /// Byte range for a block-id specifically. Convenience for the
246 /// common case.
247 pub fn range_of_block(&self, block_id: EntityId) -> Option<(u32, u32)> {
248 self.range_of(OffsetMarker::Block(block_id))
249 }
250
251 /// Like `range_of`, but also reports whether the marker has a
252 /// successor entry. Callers that need to strip the trailing
253 /// inter-block `\n` boundary (e.g. `block_content_via_store`,
254 /// `block_char_length`) use this to distinguish "end == next
255 /// marker's byte_start, with a real `\n` between us" from "end
256 /// == total_bytes, no separator after".
257 ///
258 /// O(1) average via `marker_index`.
259 pub fn range_with_successor(&self, marker: OffsetMarker) -> Option<(u32, u32, bool)> {
260 let idx = *self.marker_index.get(&marker)?;
261 let start = self.entries[idx].1;
262 let has_successor = idx + 1 < self.entries.len();
263 let end = if has_successor {
264 self.entries[idx + 1].1
265 } else {
266 self.total_bytes
267 };
268 Some((start, end, has_successor))
269 }
270
271 /// Position of `marker` in `entries`. O(1) average via the
272 /// `marker_index` cache. Returns `None` if the marker is not
273 /// indexed.
274 pub fn position_of(&self, marker: OffsetMarker) -> Option<usize> {
275 self.marker_index.get(&marker).copied()
276 }
277
278 /// Marker whose byte range covers `byte`. Returns `None` if the
279 /// index is empty or `byte` falls past `total_bytes`.
280 ///
281 /// `byte == total_bytes` is treated as belonging to the last entry
282 /// (this is the cursor-at-end-of-document case).
283 pub fn marker_at_byte(&self, byte: u32) -> Option<OffsetMarker> {
284 if self.entries.is_empty() {
285 return None;
286 }
287 if byte > self.total_bytes {
288 return None;
289 }
290 let idx = match self.entries.binary_search_by_key(&byte, |(_, bs)| *bs) {
291 Ok(i) => i,
292 Err(0) => return None,
293 Err(i) => i - 1,
294 };
295 Some(self.entries[idx].0)
296 }
297
298 /// Block id whose byte range covers `byte`, ignoring table-anchor
299 /// markers. Returns `None` if no block covers the byte.
300 pub fn block_at_byte(&self, byte: u32) -> Option<EntityId> {
301 self.marker_at_byte(byte).and_then(|m| m.as_block())
302 }
303
304 /// Convert an absolute rope byte offset into
305 /// `(marker, byte_in_marker)`. Returns `None` for offsets past the
306 /// end or for an empty index.
307 pub fn byte_to_marker_byte(&self, byte: u32) -> Option<(OffsetMarker, u32)> {
308 let marker = self.marker_at_byte(byte)?;
309 let (start, _) = self.range_of(marker)?;
310 Some((marker, byte - start))
311 }
312
313 /// Block-only variant of `byte_to_marker_byte`.
314 pub fn byte_to_block_byte(&self, byte: u32) -> Option<(EntityId, u32)> {
315 let block_id = self.block_at_byte(byte)?;
316 let (start, _) = self.range_of_block(block_id)?;
317 Some((block_id, byte - start))
318 }
319
320 /// Shift every entry whose `byte_start ≥ threshold` by `delta`
321 /// bytes, and adjust `total_bytes` by `delta`. Used after a rope
322 /// insert (positive delta) or delete (negative delta) to keep the
323 /// index in sync without a full rebuild.
324 ///
325 /// `entries` is sorted by `byte_start`, so the affected suffix is
326 /// located via `partition_point` (O(log n)) and only that suffix
327 /// is walked.
328 pub fn shift_after(&mut self, threshold: u32, delta: i32) {
329 let start = self.entries.partition_point(|(_, bs)| *bs < threshold);
330 if start < self.entries.len() {
331 for (_, bs) in Arc::make_mut(&mut self.entries)[start..].iter_mut() {
332 *bs = apply_delta(*bs, delta);
333 }
334 }
335 self.total_bytes = apply_delta(self.total_bytes, delta);
336 }
337}
338
339fn apply_delta(value: u32, delta: i32) -> u32 {
340 if delta >= 0 {
341 value
342 .checked_add(delta as u32)
343 .expect("byte offset overflow")
344 } else {
345 let abs = (-delta) as u32;
346 value
347 .checked_sub(abs)
348 .expect("byte offset would go negative")
349 }
350}