common/database/block_offset_index.rs
1//! Sorted index of marker (block or table-anchor) → rope byte range.
2//!
3//! Tracks the byte offset at which each marker starts in the global
4//! rope, plus the rope's total byte length. A marker is either a
5//! real `Block` or a `TableAnchor` — the U+FFFC sentinel that
6//! occupies a line on its own in a parent frame's range to represent
7//! an embedded table (plan §1.6). Each marker extends from its
8//! `byte_start` to the next marker's `byte_start` (or to
9//! `total_bytes` for the last entry).
10//!
11//! Invariants:
12//! - `entries` is sorted by `byte_start` ascending.
13//! - No two entries share the same `byte_start`.
14//! - The last entry's `byte_start ≤ total_bytes`.
15//! - Empty `entries` ⟺ no markers in the document.
16
17use crate::types::EntityId;
18use im::HashMap as ImHashMap;
19
20/// Discriminates real blocks from table-anchor sentinels in the
21/// offset index. The wrapped `EntityId` is the corresponding entity's
22/// id (a Block id or a Table id).
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
24pub enum OffsetMarker {
25 Block(EntityId),
26 TableAnchor(EntityId),
27}
28
29impl OffsetMarker {
30 pub fn as_block(self) -> Option<EntityId> {
31 match self {
32 OffsetMarker::Block(id) => Some(id),
33 _ => None,
34 }
35 }
36
37 pub fn as_table_anchor(self) -> Option<EntityId> {
38 match self {
39 OffsetMarker::TableAnchor(id) => Some(id),
40 _ => None,
41 }
42 }
43
44 pub fn is_block(self) -> bool {
45 matches!(self, OffsetMarker::Block(_))
46 }
47}
48
49#[derive(Debug, Clone, Default, PartialEq, Eq)]
50pub struct BlockOffsetIndex {
51 /// `(marker, byte_start)` pairs sorted by `byte_start` ascending.
52 pub entries: Vec<(OffsetMarker, u32)>,
53
54 /// Total byte length of the rope this index describes. The last
55 /// entry extends from its `byte_start` to this value.
56 pub total_bytes: u32,
57
58 /// O(1)-average lookup from marker to its position in `entries`.
59 /// Maintained eagerly by the `push` / `push_block` / `insert_at` /
60 /// `remove_at` / `clear` methods on this type — direct mutation of
61 /// `entries` from outside leaves this cache stale, so callers must
62 /// go through those methods (or call `rebuild_marker_index` after
63 /// mutating in bulk).
64 ///
65 /// Stored as `im::HashMap` so the snapshot-clone path stays O(1).
66 /// `shift_after` does not move positions, only byte_starts — so
67 /// this map is unaffected by shifts.
68 marker_index: ImHashMap<OffsetMarker, usize>,
69}
70
71impl BlockOffsetIndex {
72 pub fn new() -> Self {
73 Self::default()
74 }
75
76 pub fn len(&self) -> usize {
77 self.entries.len()
78 }
79
80 pub fn is_empty(&self) -> bool {
81 self.entries.is_empty()
82 }
83
84 pub fn total_bytes(&self) -> u32 {
85 self.total_bytes
86 }
87
88 pub fn set_total_bytes(&mut self, total: u32) {
89 self.total_bytes = total;
90 }
91
92 /// Insert a marker at a given byte position. The caller is
93 /// responsible for keeping the `byte_start` ordered relative to
94 /// neighbours — this method does NOT re-sort.
95 pub fn insert_at(&mut self, position: usize, marker: OffsetMarker, byte_start: u32) {
96 self.entries.insert(position, (marker, byte_start));
97 // All markers at positions ≥ `position` shifted up by 1.
98 for (m, p) in self.marker_index.iter_mut() {
99 if *p >= position && *m != marker {
100 *p += 1;
101 }
102 }
103 self.marker_index.insert(marker, position);
104 }
105
106 /// Append a marker at the end (its `byte_start` must be ≥ the last
107 /// entry's `byte_start`).
108 pub fn push(&mut self, marker: OffsetMarker, byte_start: u32) {
109 debug_assert!(
110 self.entries
111 .last()
112 .map(|(_, bs)| byte_start >= *bs)
113 .unwrap_or(true),
114 "push must preserve ordering"
115 );
116 let position = self.entries.len();
117 self.entries.push((marker, byte_start));
118 self.marker_index.insert(marker, position);
119 }
120
121 /// Convenience: register a block by id. Equivalent to
122 /// `push(OffsetMarker::Block(id), byte_start)`.
123 pub fn push_block(&mut self, block_id: EntityId, byte_start: u32) {
124 self.push(OffsetMarker::Block(block_id), byte_start);
125 }
126
127 /// Remove the entry at the given position. Panics if out of bounds.
128 pub fn remove_at(&mut self, position: usize) -> (OffsetMarker, u32) {
129 let removed = self.entries.remove(position);
130 self.marker_index.remove(&removed.0);
131 // All markers at positions > `position` shifted down by 1.
132 for (_, p) in self.marker_index.iter_mut() {
133 if *p > position {
134 *p -= 1;
135 }
136 }
137 removed
138 }
139
140 /// Remove a contiguous range of entries, equivalent to
141 /// `entries.drain(start..=end_inclusive)` plus the matching
142 /// marker_index maintenance. Returns the removed entries.
143 pub fn drain_inclusive(
144 &mut self,
145 start: usize,
146 end_inclusive: usize,
147 ) -> Vec<(OffsetMarker, u32)> {
148 let removed: Vec<_> = self.entries.drain(start..=end_inclusive).collect();
149 for (m, _) in &removed {
150 self.marker_index.remove(m);
151 }
152 let shift = removed.len();
153 // All markers at positions > end_inclusive shift down by `shift`.
154 for (_, p) in self.marker_index.iter_mut() {
155 if *p > end_inclusive {
156 *p -= shift;
157 }
158 }
159 removed
160 }
161
162 /// Drop every entry and reset `total_bytes` to zero. Equivalent to
163 /// `*self = Self::default()` but expressed as a method so callers
164 /// don't need to depend on `Default`.
165 pub fn clear(&mut self) {
166 self.entries.clear();
167 self.marker_index = ImHashMap::new();
168 self.total_bytes = 0;
169 }
170
171 /// Rebuild `marker_index` from `entries`. Use after bulk mutations
172 /// that bypassed the maintenance methods (e.g. raw `entries.push`
173 /// in test setup).
174 pub fn rebuild_marker_index(&mut self) {
175 let mut idx = ImHashMap::new();
176 for (i, (m, _)) in self.entries.iter().enumerate() {
177 idx.insert(*m, i);
178 }
179 self.marker_index = idx;
180 }
181
182 /// Byte range `(start, end)` of a marker. `end` is the next
183 /// marker's `byte_start` (or `total_bytes` for the last entry).
184 /// Returns `None` if the marker is not indexed.
185 ///
186 /// O(1) average via the `marker_index` map.
187 pub fn range_of(&self, marker: OffsetMarker) -> Option<(u32, u32)> {
188 let idx = *self.marker_index.get(&marker)?;
189 let start = self.entries[idx].1;
190 let end = self
191 .entries
192 .get(idx + 1)
193 .map(|(_, bs)| *bs)
194 .unwrap_or(self.total_bytes);
195 Some((start, end))
196 }
197
198 /// Byte range for a block-id specifically. Convenience for the
199 /// common case.
200 pub fn range_of_block(&self, block_id: EntityId) -> Option<(u32, u32)> {
201 self.range_of(OffsetMarker::Block(block_id))
202 }
203
204 /// Like `range_of`, but also reports whether the marker has a
205 /// successor entry. Callers that need to strip the trailing
206 /// inter-block `\n` boundary (e.g. `block_content_via_store`,
207 /// `block_char_length`) use this to distinguish "end == next
208 /// marker's byte_start, with a real `\n` between us" from "end
209 /// == total_bytes, no separator after".
210 ///
211 /// O(1) average via `marker_index`.
212 pub fn range_with_successor(&self, marker: OffsetMarker) -> Option<(u32, u32, bool)> {
213 let idx = *self.marker_index.get(&marker)?;
214 let start = self.entries[idx].1;
215 let has_successor = idx + 1 < self.entries.len();
216 let end = if has_successor {
217 self.entries[idx + 1].1
218 } else {
219 self.total_bytes
220 };
221 Some((start, end, has_successor))
222 }
223
224 /// Position of `marker` in `entries`. O(1) average via the
225 /// `marker_index` cache. Returns `None` if the marker is not
226 /// indexed.
227 pub fn position_of(&self, marker: OffsetMarker) -> Option<usize> {
228 self.marker_index.get(&marker).copied()
229 }
230
231 /// Marker whose byte range covers `byte`. Returns `None` if the
232 /// index is empty or `byte` falls past `total_bytes`.
233 ///
234 /// `byte == total_bytes` is treated as belonging to the last entry
235 /// (this is the cursor-at-end-of-document case).
236 pub fn marker_at_byte(&self, byte: u32) -> Option<OffsetMarker> {
237 if self.entries.is_empty() {
238 return None;
239 }
240 if byte > self.total_bytes {
241 return None;
242 }
243 let idx = match self.entries.binary_search_by_key(&byte, |(_, bs)| *bs) {
244 Ok(i) => i,
245 Err(0) => return None,
246 Err(i) => i - 1,
247 };
248 Some(self.entries[idx].0)
249 }
250
251 /// Block id whose byte range covers `byte`, ignoring table-anchor
252 /// markers. Returns `None` if no block covers the byte.
253 pub fn block_at_byte(&self, byte: u32) -> Option<EntityId> {
254 self.marker_at_byte(byte).and_then(|m| m.as_block())
255 }
256
257 /// Convert an absolute rope byte offset into
258 /// `(marker, byte_in_marker)`. Returns `None` for offsets past the
259 /// end or for an empty index.
260 pub fn byte_to_marker_byte(&self, byte: u32) -> Option<(OffsetMarker, u32)> {
261 let marker = self.marker_at_byte(byte)?;
262 let (start, _) = self.range_of(marker)?;
263 Some((marker, byte - start))
264 }
265
266 /// Block-only variant of `byte_to_marker_byte`.
267 pub fn byte_to_block_byte(&self, byte: u32) -> Option<(EntityId, u32)> {
268 let block_id = self.block_at_byte(byte)?;
269 let (start, _) = self.range_of_block(block_id)?;
270 Some((block_id, byte - start))
271 }
272
273 /// Shift every entry whose `byte_start ≥ threshold` by `delta`
274 /// bytes, and adjust `total_bytes` by `delta`. Used after a rope
275 /// insert (positive delta) or delete (negative delta) to keep the
276 /// index in sync without a full rebuild.
277 pub fn shift_after(&mut self, threshold: u32, delta: i32) {
278 for (_, bs) in self.entries.iter_mut() {
279 if *bs >= threshold {
280 *bs = apply_delta(*bs, delta);
281 }
282 }
283 self.total_bytes = apply_delta(self.total_bytes, delta);
284 }
285}
286
287fn apply_delta(value: u32, delta: i32) -> u32 {
288 if delta >= 0 {
289 value
290 .checked_add(delta as u32)
291 .expect("byte offset overflow")
292 } else {
293 let abs = (-delta) as u32;
294 value
295 .checked_sub(abs)
296 .expect("byte offset would go negative")
297 }
298}