common/database/rope_helpers.rs
1//! Helpers for writing the global character rope from use cases.
2//!
3//! Each helper mutates `store.rope` and `store.block_offsets`
4//! together so callers can stay oblivious to the underlying layout.
5//! Read helpers (`block_content_via_store`) source content from the
6//! rope and return empty when a block is not yet registered in the
7//! offset index.
8
9use crate::database::Store;
10use crate::database::block_offset_index::OffsetMarker;
11use crate::entities::Block;
12use crate::types::EntityId;
13
14/// Read a block's content from the global rope via `block_offsets`,
15/// stripping the trailing `\n` boundary that `range_of` includes for
16/// non-last entries. Returns an empty string if the block isn't
17/// registered in the offset index (e.g. a freshly-created block that
18/// hasn't been spliced into the rope yet — `setup_with_text` test
19/// docs use this path).
20pub fn block_content_via_store(block: &Block, store: &Store) -> String {
21 let offsets = store.block_offsets.read().unwrap();
22 let marker = OffsetMarker::Block(block.id);
23 let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
24 return String::new();
25 };
26 // Drop the trailing inter-block boundary `\n` ONLY when this block
27 // has a successor entry — that one byte is the boundary `\n` between
28 // this block and the next. The last entry has no trailing boundary;
29 // any final `\n` is real content.
30 let content_end = if has_successor && be > bs { be - 1 } else { be };
31 drop(offsets);
32 let rope = store.rope.read().unwrap();
33 rope.byte_slice(bs as usize..content_end as usize)
34 .to_string()
35}
36
37/// Logical character count of a block — what the old
38/// `Block.text_length` field used to cache. Image anchors are stored as
39/// `\u{FFFC}` (one char, three bytes) inside the rope content, so the
40/// char count already covers them. Returns 0 for blocks not registered
41/// in the offset index.
42///
43/// O(log n) via `ropey::Rope::byte_to_char` — does NOT materialize the
44/// block's text into a String. Replaces the prior O(L) implementation
45/// that counted chars by walking UTF-8 over a cloned slice.
46pub fn block_char_length(block: &Block, store: &Store) -> i64 {
47 let offsets = store.block_offsets.read().unwrap();
48 let marker = OffsetMarker::Block(block.id);
49 let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
50 return 0;
51 };
52 let content_end_bytes = if has_successor && be > bs { be - 1 } else { be };
53 drop(offsets);
54 let rope = store.rope.read().unwrap();
55 let char_start = rope.byte_to_char(bs as usize);
56 let char_end = rope.byte_to_char(content_end_bytes as usize);
57 (char_end - char_start) as i64
58}
59
60/// Return the absolute character position of a block's start in the
61/// document, derived from the rope via `BlockOffsetIndex`.
62///
63/// O(log n): one `range_of_block` lookup + one `byte_to_char` conversion.
64/// Falls back to `block.document_position` for blocks not registered in
65/// the index — currently table-cell blocks (plan §1.6 deferred) and
66/// blocks under non-top-level frames (`insert_frame_uc` only mirrors
67/// top-level frames). The stored field is set authoritatively by those
68/// non-flow paths and stays correct for them across main-flow edits.
69pub fn block_document_position(block: &Block, store: &Store) -> i64 {
70 let offsets = store.block_offsets.read().unwrap();
71 let Some((byte_start, _)) = offsets.range_of_block(block.id) else {
72 return block.document_position;
73 };
74 drop(offsets);
75 let rope = store.rope.read().unwrap();
76 rope.byte_to_char(byte_start as usize) as i64
77}
78
79/// Whether the rope's char-position space matches the user-visible
80/// flow positions that `Block.document_position` is computed against.
81///
82/// They match when every block is mirrored to the rope as a contiguous
83/// run. They DON'T match when:
84/// 1. The document contains tables — cell content sits at separate rope
85/// byte ranges (plan §1.6), so the rope is missing the cells'
86/// flow-position contribution.
87/// 2. The document contains sub-frames whose blocks aren't mirrored —
88/// `insert_frame_uc` only mirrors top-level frames.
89///
90/// When this returns `true`, readers can derive `document_position`
91/// directly from the rope (O(log n)) and the use-case-side
92/// position-refresh loops can be skipped. When it returns `false`,
93/// readers must consult the maintained `Block.document_position`
94/// stored field, and the loops are required to keep it correct.
95pub fn rope_positions_match_flow(store: &Store) -> bool {
96 let offsets = store.block_offsets.read().unwrap();
97 if offsets.table_anchor_count() > 0 {
98 return false;
99 }
100 // With no table anchors, every entry is a Block marker, so
101 // `entries.len()` equals the indexed block count.
102 let indexed_block_count = offsets.entries.len();
103 drop(offsets);
104 let total_block_count = store.blocks.read().unwrap().len();
105 indexed_block_count == total_block_count
106}
107
108/// Locate which block contains a given absolute char position in the
109/// document, returning `(block_id, char_offset_in_block, block_char_start)`
110/// in O(log n) using the rope + `BlockOffsetIndex` instead of an O(N)
111/// linear walk of all blocks.
112///
113/// Replaces the per-keystroke hot path in editing use cases
114/// (`find_block_at_position_sequential`) which fetched every block
115/// + called `block_char_length` per block. For an N-block document
116/// each editor keystroke now costs O(log n) lookups instead of O(N).
117///
118/// Returns `None` for documents containing tables — table cell content
119/// lives in separate byte ranges later in the rope (plan §1.6), so the
120/// rope's char-position space diverges from the user-visible flow
121/// order. Callers must fall back to the slow per-block walk in that
122/// case.
123///
124/// `position` past the document end clamps to the last block's
125/// end-of-content.
126pub fn find_block_at_char_position(store: &Store, position: i64) -> Option<(EntityId, i64, i64)> {
127 // Fast path is only valid when EVERY block in the document is
128 // mirrored to the rope. Disqualifying cases:
129 //
130 // 1. Documents containing tables — cell content sits at separate
131 // rope byte ranges (plan §1.6) so byte→block lookup finds the
132 // wrong block for cursor positions inside cells.
133 //
134 // 2. Documents containing sub-frames inserted with a parent —
135 // `insert_frame_uc` currently only mirrors top-level frames to
136 // the rope (parent=None case), leaving sub-frame blocks
137 // unregistered in the offset index. Detect by comparing block
138 // counts: if the rope index has fewer Block markers than the
139 // store has Block entities, some are unmirrored.
140 let offsets = store.block_offsets.read().unwrap();
141 if offsets.table_anchor_count() > 0 {
142 return None;
143 }
144 // No table anchors → every entry is a Block marker, so the indexed
145 // block count is simply `entries.len()`. O(1) instead of an O(N)
146 // filter-count on every keystroke.
147 let indexed_block_count = offsets.entries.len();
148 let total_block_count = store.blocks.read().unwrap().len();
149 if indexed_block_count != total_block_count {
150 return None;
151 }
152 drop(offsets);
153
154 let rope = store.rope.read().unwrap();
155 let total_chars = rope.len_chars() as i64;
156 let pos_clamped = position.clamp(0, total_chars);
157 let abs_byte = rope.char_to_byte(pos_clamped as usize);
158 drop(rope);
159
160 let offsets = store.block_offsets.read().unwrap();
161 let block_id = offsets.marker_at_byte(abs_byte as u32)?.as_block()?;
162 let (bs, be, has_successor) = offsets.range_with_successor(OffsetMarker::Block(block_id))?;
163 let content_end = if has_successor && be > bs { be - 1 } else { be };
164
165 drop(offsets);
166
167 let rope = store.rope.read().unwrap();
168 let block_char_start = rope.byte_to_char(bs as usize) as i64;
169 // Clamp the cursor's byte to the block's content area (not into the
170 // trailing `\n` boundary if any).
171 let byte_for_char = std::cmp::min(abs_byte, content_end as usize);
172 let abs_char = rope.byte_to_char(byte_for_char) as i64;
173 let char_in_block = abs_char - block_char_start;
174
175 // NOTE: separator semantics differ across use cases. Callers like
176 // `get_block_at_position_uc` interpret "position at the end of a
177 // non-empty block with a successor" as "belongs to the next
178 // block"; callers like `insert_text_uc` want the previous block
179 // with offset == block_char_len. This helper returns the
180 // previous-block answer (the simpler semantic); use-case-specific
181 // advance logic lives at the call site.
182 let _ = has_successor;
183
184 Some((block_id, char_in_block, block_char_start))
185}
186
187/// Convert an in-block char offset to an in-block byte offset using the
188/// rope's index. Both inputs and outputs are relative to the start of
189/// the block's content (NOT absolute rope positions). The char offset
190/// is clamped to the block's logical length so callers don't need to
191/// pre-validate.
192///
193/// O(log n) via `ropey::Rope::char_to_byte` — replaces the O(L)
194/// "materialize block text + walk char_indices" pattern that
195/// `set_text_format_uc` / `merge_text_format_uc` used. Returns
196/// `(byte_offset_in_block, content_byte_len)` so callers can also
197/// pass the content length to `debug_assert_well_formed` without
198/// materializing the text.
199///
200/// Returns `(0, 0)` for blocks not registered in the offset index.
201pub fn block_char_to_byte_in_block(
202 store: &Store,
203 block_id: EntityId,
204 char_offset: usize,
205) -> (u32, usize) {
206 let offsets = store.block_offsets.read().unwrap();
207 let marker = OffsetMarker::Block(block_id);
208 let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
209 return (0, 0);
210 };
211 let content_end_bytes = if has_successor && be > bs { be - 1 } else { be };
212 let content_byte_len = (content_end_bytes - bs) as usize;
213 drop(offsets);
214
215 let rope = store.rope.read().unwrap();
216 let block_char_start = rope.byte_to_char(bs as usize);
217 let block_char_end = rope.byte_to_char(content_end_bytes as usize);
218 let block_char_len = block_char_end - block_char_start;
219
220 // Clamp char_offset to block's char length.
221 let clamped = std::cmp::min(char_offset, block_char_len);
222 let abs_byte = rope.char_to_byte(block_char_start + clamped);
223 let byte_in_block = (abs_byte - bs as usize) as u32;
224 (byte_in_block, content_byte_len)
225}
226
227/// Fast-path full-document plain text: returns `Some(rope.to_string())`
228/// iff the document is in the canonical flat layout — no table anchors
229/// in the offset index, single top-level frame. In that case the
230/// rope's byte order is the same as the document-flow order, so one
231/// `to_string()` allocation replaces the O(N) per-block walk +
232/// per-block `Cow<str>` materialization that `build_full_text` /
233/// `export_plain_text` would otherwise do.
234///
235/// Returns `None` for documents containing tables or nested frames —
236/// those require the per-frame, per-block traversal because table
237/// cell content lives in separate byte ranges later in the rope
238/// (plan §1.6), not interleaved with the parent frame's bytes.
239///
240/// `top_frame_count` is the caller-known number of top-level frames
241/// (typically obtained from `Document.frames.len()`). Callers
242/// already have this value and pass it in to avoid a redundant uow
243/// query.
244pub fn rope_flat_text_if_simple(store: &Store, top_frame_count: usize) -> Option<String> {
245 if top_frame_count != 1 {
246 return None;
247 }
248 let offsets = store.block_offsets.read().unwrap();
249 let has_table = offsets
250 .entries
251 .iter()
252 .any(|(m, _)| matches!(m, OffsetMarker::TableAnchor(_)));
253 if has_table {
254 return None;
255 }
256 drop(offsets);
257 Some(store.rope.read().unwrap().to_string())
258}
259
260/// Reset the rope to empty and clear `block_offsets`. Called by
261/// importers when they replace the entire document content.
262pub fn rope_reset(store: &Store) {
263 *store.rope.write().unwrap() = ropey::Rope::new();
264 *store.block_offsets.write().unwrap() =
265 crate::database::block_offset_index::BlockOffsetIndex::new();
266}
267
268/// Append `text` to the end of the rope and register `block_id` at
269/// the byte position where the text starts. Returns that byte offset.
270///
271/// Callers are responsible for inserting an inter-block `\n`
272/// (`rope_insert_block_boundary`) before each block AFTER the first
273/// in a contiguous frame.
274pub fn rope_append_block(store: &Store, block_id: EntityId, text: &str) -> u32 {
275 let mut rope = store.rope.write().unwrap();
276 let byte_start = rope.len_bytes() as u32;
277 let char_end = rope.len_chars();
278 rope.insert(char_end, text);
279 let new_total = rope.len_bytes() as u32;
280 drop(rope);
281
282 let mut offsets = store.block_offsets.write().unwrap();
283 offsets.push_block(block_id, byte_start);
284 offsets.set_total_bytes(new_total);
285 byte_start
286}
287
288/// Insert `text` as a new block at `byte_pos` in the rope, prepending
289/// a `\n` boundary. Used by `insert_table_uc` to place cell blocks at
290/// the end of their containing top-level frame's range (plan §1.6),
291/// rather than always at rope end.
292///
293/// Total bytes inserted: `1 + text.len()`. The block's content
294/// occupies `[byte_pos + 1, byte_pos + 1 + text.len())`. The block
295/// entry is registered at `byte_pos + 1` in `block_offsets`.
296///
297/// Existing entries with `byte_start == byte_pos` (e.g. a previous
298/// empty block whose end coincides with this insertion point) are
299/// kept BEFORE the new entry in the Vec, since the inserted `\n`
300/// boundary belongs after them. Entries strictly past `byte_pos`
301/// shift forward by `(1 + text.len())` bytes.
302///
303/// When `byte_pos == total_bytes`, behaves like
304/// `rope_insert_block_boundary` followed by `rope_append_block`.
305pub fn rope_insert_block_at(store: &Store, byte_pos: u32, block_id: EntityId, text: &str) {
306 let delta = (1 + text.len()) as i32;
307 // Vec position: insert AFTER any entry at byte_pos itself
308 // (those represent earlier empty blocks whose `\n` boundary
309 // we are placing now). Only entries strictly past byte_pos
310 // come after our new entry in the Vec.
311 let new_entry_vec_pos = {
312 let offsets = store.block_offsets.read().unwrap();
313 offsets
314 .entries
315 .iter()
316 .position(|(_, bs)| *bs > byte_pos)
317 .unwrap_or(offsets.entries.len())
318 };
319 {
320 let mut rope = store.rope.write().unwrap();
321 let char_idx = rope.byte_to_char(byte_pos as usize);
322 let mut combined = String::with_capacity(1 + text.len());
323 combined.push('\n');
324 combined.push_str(text);
325 rope.insert(char_idx, &combined);
326 }
327 let mut offsets = store.block_offsets.write().unwrap();
328 // Shift entries strictly past byte_pos. Entries AT byte_pos
329 // (the prior empty block) stay where they are — the new `\n`
330 // is conceptually "after" them.
331 offsets.shift_after(byte_pos + 1, delta);
332 offsets.insert_at(
333 new_entry_vec_pos,
334 OffsetMarker::Block(block_id),
335 byte_pos + 1,
336 );
337}
338
339/// Walks up `frame.parent_frame` to find the top-level ancestor of
340/// the given frame, then returns the end byte of that top-level
341/// frame's current rope range — i.e. the byte position where blocks
342/// belonging to that frame's subtree (e.g. table cells per plan §1.6)
343/// should be inserted so they land BEFORE any following top-level
344/// frame's content.
345///
346/// Reads `block_offsets`/`frames`/`tables`/`table_cells` directly, so
347/// the result is fresh even when `Frame.byte_range` has not yet been
348/// recomputed at commit time.
349pub fn top_level_frame_end_byte(store: &Store, frame_id: EntityId) -> u32 {
350 let top_id = {
351 let frames = store.frames.read().unwrap();
352 let mut current = frame_id;
353 loop {
354 let Some(f) = frames.get(¤t) else {
355 return 0;
356 };
357 match f.parent_frame {
358 None => break current,
359 Some(p) => current = p,
360 }
361 }
362 };
363 let (_min, max) = compute_frame_byte_range_recursive(store, top_id);
364 max
365}
366
367/// Append a new empty block to the end of the rope, separating it
368/// from any prior content with a `\n` boundary (only if the rope is
369/// already non-empty). Registers `block_id` at the resulting byte
370/// position. Returns that byte position. Used when `insert_frame_uc`
371/// creates a new top-level frame with a single empty block.
372pub fn rope_append_empty_block(store: &Store, block_id: EntityId) -> u32 {
373 let was_empty = store.rope.read().unwrap().len_bytes() == 0;
374 if !was_empty {
375 rope_insert_block_boundary(store);
376 }
377 let pos = store.rope.read().unwrap().len_bytes() as u32;
378 let mut offsets = store.block_offsets.write().unwrap();
379 offsets.push_block(block_id, pos);
380 offsets.set_total_bytes(pos);
381 pos
382}
383
384/// Append a single `\n` inter-block boundary character to the end of
385/// the rope. Does NOT register a block — this is the sentinel between
386/// two adjacent blocks within the same frame (plan §1.4).
387pub fn rope_insert_block_boundary(store: &Store) {
388 let mut rope = store.rope.write().unwrap();
389 let char_end = rope.len_chars();
390 rope.insert(char_end, "\n");
391 let new_total = rope.len_bytes() as u32;
392 drop(rope);
393
394 store
395 .block_offsets
396 .write()
397 .unwrap()
398 .set_total_bytes(new_total);
399}
400
401/// Insert `text` at `byte_offset_in_block` inside the block identified
402/// by `block_id`. Looks up the block's start in the rope via
403/// `block_offsets.range_of()`, splices into the rope, and shifts
404/// subsequent block offsets by the inserted byte length.
405///
406/// Silently no-ops if the block is not registered in the offset index
407/// (this can happen for blocks whose content lives outside the global
408/// rope, e.g. table cells until step 5.5).
409pub fn rope_insert_in_block(
410 store: &Store,
411 block_id: EntityId,
412 byte_offset_in_block: u32,
413 text: &str,
414) {
415 let inserted_bytes = text.len() as u32;
416 if inserted_bytes == 0 {
417 return;
418 }
419 let block_byte_start = {
420 let offsets = store.block_offsets.read().unwrap();
421 let Some((start, _end)) = offsets.range_of_block(block_id) else {
422 return;
423 };
424 start
425 };
426 let rope_byte = block_byte_start + byte_offset_in_block;
427 {
428 let mut rope = store.rope.write().unwrap();
429 let char_idx = rope.byte_to_char(rope_byte as usize);
430 rope.insert(char_idx, text);
431 }
432 // Shift entries past this block by inserted_bytes. Threshold
433 // is one byte past block_byte_start so the current block's own
434 // entry isn't moved.
435 store
436 .block_offsets
437 .write()
438 .unwrap()
439 .shift_after(block_byte_start + 1, inserted_bytes as i32);
440}
441
442/// Split an existing block in the rope at `byte_offset_in_block`:
443/// - inserts a `\n` inter-block boundary at the absolute byte position
444/// `block_start + byte_offset_in_block` in the rope
445/// - shifts entries past that position by +1 byte
446/// - inserts a new entry for `new_block_id` at
447/// `block_start + byte_offset_in_block + 1` (right after the newline),
448/// placed immediately after the original block in the entries Vec
449///
450/// `byte_offset_in_block` may be 0 (split before first char of block,
451/// i.e. insert empty block before this one) or equal to the block's
452/// byte length (split after last char, i.e. insert empty block after).
453pub fn rope_split_block(
454 store: &Store,
455 current_block_id: EntityId,
456 byte_offset_in_block: u32,
457 new_block_id: EntityId,
458) {
459 let current_marker = OffsetMarker::Block(current_block_id);
460 let (block_start, current_idx) = {
461 let offsets = store.block_offsets.read().unwrap();
462 let Some((start, _end)) = offsets.range_of(current_marker) else {
463 return;
464 };
465 let idx = offsets
466 .entries
467 .iter()
468 .position(|(m, _)| *m == current_marker)
469 .unwrap();
470 (start, idx)
471 };
472 let split_byte = block_start + byte_offset_in_block;
473
474 // 1. Insert the `\n` boundary at the split point.
475 {
476 let mut rope = store.rope.write().unwrap();
477 let char_idx = rope.byte_to_char(split_byte as usize);
478 rope.insert(char_idx, "\n");
479 }
480
481 // 2. Shift entries past the split (and total_bytes) by +1.
482 // Threshold > split_byte so the new entry we insert next
483 // isn't double-shifted.
484 store
485 .block_offsets
486 .write()
487 .unwrap()
488 .shift_after(split_byte + 1, 1);
489
490 // 3. Register the new block at `split_byte + 1`, immediately
491 // after the original in the entries Vec.
492 store.block_offsets.write().unwrap().insert_at(
493 current_idx + 1,
494 OffsetMarker::Block(new_block_id),
495 split_byte + 1,
496 );
497}
498
499/// Merge `start_block` and `end_block` by deleting the rope range
500/// `[start_block.start + byte_so .. end_block.start + byte_eo)` — i.e.
501/// the suffix of `start_block`, every block between (and their
502/// boundary newlines), and the prefix of `end_block`. Removes the
503/// index entries for every block strictly between `start_block` and
504/// `end_block` (inclusive of `end_block` itself); the surviving
505/// content lives in `start_block`. Shifts any blocks past `end_block`
506/// by the negative delta.
507///
508/// No-op if `start_block` is not in the index. Skipped for any
509/// intermediate block id whose range is missing from the index (e.g.
510/// table cells until step 5.5e).
511pub fn rope_merge_block_range(
512 store: &Store,
513 start_block_id: EntityId,
514 byte_so_in_start: u32,
515 end_block_id: EntityId,
516 byte_eo_in_end: u32,
517) {
518 let start_marker = OffsetMarker::Block(start_block_id);
519 let end_marker = OffsetMarker::Block(end_block_id);
520 let (start_block_byte, end_block_byte, start_idx, end_idx) = {
521 let offsets = store.block_offsets.read().unwrap();
522 let Some((sb, _)) = offsets.range_of(start_marker) else {
523 return;
524 };
525 let Some((eb, _)) = offsets.range_of(end_marker) else {
526 return;
527 };
528 let si = offsets
529 .entries
530 .iter()
531 .position(|(m, _)| *m == start_marker)
532 .unwrap();
533 let ei = offsets
534 .entries
535 .iter()
536 .position(|(m, _)| *m == end_marker)
537 .unwrap();
538 (sb, eb, si, ei)
539 };
540 if end_idx <= start_idx {
541 return;
542 }
543
544 let delete_start = start_block_byte + byte_so_in_start;
545 let delete_end = end_block_byte + byte_eo_in_end;
546 if delete_end <= delete_start {
547 return;
548 }
549 let deleted_bytes = delete_end - delete_start;
550
551 // 1. Remove the rope range.
552 {
553 let mut rope = store.rope.write().unwrap();
554 let char_start = rope.byte_to_char(delete_start as usize);
555 let char_end = rope.byte_to_char(delete_end as usize);
556 rope.remove(char_start..char_end);
557 }
558
559 // 2. Remove block_offsets entries for [start_idx+1 ..= end_idx].
560 {
561 let mut offsets = store.block_offsets.write().unwrap();
562 offsets.drain_inclusive(start_idx + 1, end_idx);
563 }
564
565 // 3. Shift any remaining entries past the deletion by -deleted_bytes.
566 // Threshold > delete_start because start_block's own entry
567 // sits at delete_start - byte_so_in_start (≤ delete_start)
568 // and must not move.
569 store
570 .block_offsets
571 .write()
572 .unwrap()
573 .shift_after(delete_start + 1, -(deleted_bytes as i32));
574}
575
576/// Insert a U+FFFC OBJECT REPLACEMENT CHARACTER sentinel in the rope
577/// at the table-anchor position, registering a `TableAnchor(table_id)`
578/// marker in the offset index (plan §1.6).
579///
580/// `target_block_id` is the block in the parent frame that the table
581/// is adjacent to. `after` controls whether the table goes BEFORE
582/// (`after = false`) or AFTER the target block.
583///
584/// The 3-byte sentinel is paired with an inter-marker `\n`:
585/// - `after = false`: inserts `\u{FFFC}\n` at `target.byte_start`
586/// - `after = true`, target is NOT the last entry: inserts
587/// `\u{FFFC}\n` at `target.byte_end` (between target's trailing
588/// `\n` and the next entry)
589/// - `after = true`, target IS the last entry: inserts `\n\u{FFFC}`
590/// at `target.byte_end` (rope now ends with the sentinel)
591///
592/// NOTE: cell-internal content is not yet routed through the rope —
593/// the rope reflects table *presence* (3-byte sentinel) only.
594/// Routing cell content is deferred to plan §1.6's `Frame.byte_range`
595/// model.
596///
597/// No-op if `target_block_id` is not in the index.
598pub fn rope_insert_table_anchor(
599 store: &Store,
600 table_id: EntityId,
601 target_block_id: EntityId,
602 after: bool,
603) {
604 const SENTINEL: &str = "\u{FFFC}"; // 3 bytes
605 const SENTINEL_BYTES: u32 = 3;
606
607 let (insert_pos, target_idx, target_is_last) = {
608 let offsets = store.block_offsets.read().unwrap();
609 let target_marker = OffsetMarker::Block(target_block_id);
610 let Some((start, end)) = offsets.range_of(target_marker) else {
611 return;
612 };
613 let idx = offsets
614 .entries
615 .iter()
616 .position(|(m, _)| *m == target_marker)
617 .unwrap();
618 let is_last = idx + 1 == offsets.entries.len();
619 let pos = if after { end } else { start };
620 (pos, idx, is_last)
621 };
622
623 // Insertion strategy:
624 let (rope_inserted, marker_byte_start, new_entry_pos, shift_threshold, shift_delta) = if !after
625 {
626 // Before target: "\u{FFFC}\n" at target.byte_start
627 ("\u{FFFC}\n", insert_pos, target_idx, insert_pos, 4i32)
628 } else if !target_is_last {
629 // After target, with following entries: "\u{FFFC}\n"
630 // at target.byte_end. The TableAnchor sits where the
631 // next block USED to start; that following entry
632 // shifts by 4.
633 ("\u{FFFC}\n", insert_pos, target_idx + 1, insert_pos, 4i32)
634 } else {
635 // After target which is last: "\n\u{FFFC}" appended.
636 // TableAnchor's byte_start sits 1 past the original
637 // total (after the new `\n`).
638 (
639 "\n\u{FFFC}",
640 insert_pos + 1,
641 target_idx + 1,
642 insert_pos,
643 4i32,
644 )
645 };
646
647 // 1. Splice the literal bytes into the rope.
648 {
649 let mut rope = store.rope.write().unwrap();
650 let char_idx = rope.byte_to_char(insert_pos as usize);
651 rope.insert(char_idx, rope_inserted);
652 }
653
654 // 2. Shift entries past the insertion point. Use shift_after
655 // BEFORE inserting our new entry so we don't double-shift.
656 store
657 .block_offsets
658 .write()
659 .unwrap()
660 .shift_after(shift_threshold, shift_delta);
661
662 // 3. Register the TableAnchor at the resolved byte position
663 // and the resolved Vec position.
664 store.block_offsets.write().unwrap().insert_at(
665 new_entry_pos,
666 OffsetMarker::TableAnchor(table_id),
667 marker_byte_start,
668 );
669
670 // Note: SENTINEL_BYTES is part of `shift_delta` (3 for the
671 // sentinel + 1 for the `\n`).
672 let _ = SENTINEL;
673 let _ = SENTINEL_BYTES;
674}
675
676/// Append a U+FFFC table-anchor sentinel at the end of the rope and
677/// register a `TableAnchor(table_id)` marker. If the rope is already
678/// non-empty, prepends a `\n` boundary so the sentinel doesn't run
679/// into the previous entry's content.
680///
681/// Used by import paths (`import_html_uc`, `import_markdown_uc`)
682/// that process the document linearly and append entities as they
683/// encounter them, rather than inserting relative to an existing
684/// target block.
685pub fn rope_append_table_anchor(store: &Store, table_id: EntityId) {
686 let (anchor_byte_start, new_total) = {
687 let mut rope = store.rope.write().unwrap();
688 let was_empty = rope.len_bytes() == 0;
689 let char_end = rope.len_chars();
690 let to_insert = if was_empty { "\u{FFFC}" } else { "\n\u{FFFC}" };
691 rope.insert(char_end, to_insert);
692 let new_total = rope.len_bytes() as u32;
693 // Sentinel is 3 bytes; if a `\n` was prepended that's 1 byte
694 // before the sentinel.
695 let anchor_byte_start = new_total - 3;
696 (anchor_byte_start, new_total)
697 };
698
699 let mut offsets = store.block_offsets.write().unwrap();
700 offsets.push(OffsetMarker::TableAnchor(table_id), anchor_byte_start);
701 offsets.set_total_bytes(new_total);
702}
703
704/// Remove a TableAnchor sentinel from the rope, undoing the effect
705/// of `rope_insert_table_anchor`. Looks up the anchor's byte range
706/// (always 3 bytes for the U+FFFC plus 1 byte of inter-marker `\n`
707/// either before or after, depending on what's adjacent), removes
708/// those 4 bytes from the rope, drops the entry, shifts trailing
709/// entries by -4.
710///
711/// No-op if no TableAnchor for `table_id` exists.
712pub fn rope_remove_table_anchor(store: &Store, table_id: EntityId) {
713 let anchor_marker = OffsetMarker::TableAnchor(table_id);
714 let (anchor_byte_start, anchor_idx, anchor_is_last, has_predecessor) = {
715 let offsets = store.block_offsets.read().unwrap();
716 let Some((start, _end)) = offsets.range_of(anchor_marker) else {
717 return;
718 };
719 let idx = offsets
720 .entries
721 .iter()
722 .position(|(m, _)| *m == anchor_marker)
723 .unwrap();
724 let is_last = idx + 1 == offsets.entries.len();
725 let has_pred = idx > 0;
726 (start, idx, is_last, has_pred)
727 };
728
729 // Symmetric to insert_table_anchor. The 4 bytes to remove are:
730 // - if anchor is last: [byte_start - 1 .. byte_start + 3) — the
731 // preceding `\n` + the 3-byte sentinel
732 // - otherwise: [byte_start .. byte_start + 4) — the sentinel
733 // + the following `\n`
734 let (remove_start, remove_end) = if anchor_is_last && has_predecessor {
735 (anchor_byte_start - 1, anchor_byte_start + 3)
736 } else {
737 (anchor_byte_start, anchor_byte_start + 4)
738 };
739
740 {
741 let mut rope = store.rope.write().unwrap();
742 let char_start = rope.byte_to_char(remove_start as usize);
743 let char_end = rope.byte_to_char(remove_end as usize);
744 rope.remove(char_start..char_end);
745 }
746 {
747 let mut offsets = store.block_offsets.write().unwrap();
748 offsets.remove_at(anchor_idx);
749 }
750 store
751 .block_offsets
752 .write()
753 .unwrap()
754 .shift_after(remove_start, -4);
755}
756
757/// Remove a registered block from the rope: drops its content bytes
758/// plus one boundary `\n` (the one after, if the block has a
759/// successor; the one before, if it's the last entry), removes the
760/// entry from the index, and shifts trailing entries by the negative
761/// byte delta.
762///
763/// No-op if `block_id` is not in the index. No-op for the special
764/// case of a single-block document being asked to remove its sole
765/// block (we'd produce an empty rope but the block itself is being
766/// cascaded by the caller).
767pub fn rope_remove_block(store: &Store, block_id: EntityId) {
768 let block_marker = OffsetMarker::Block(block_id);
769 let (block_start, block_end, idx, is_last, has_pred) = {
770 let offsets = store.block_offsets.read().unwrap();
771 let Some((start, end)) = offsets.range_of(block_marker) else {
772 return;
773 };
774 let idx = offsets
775 .entries
776 .iter()
777 .position(|(m, _)| *m == block_marker)
778 .unwrap();
779 let is_last = idx + 1 == offsets.entries.len();
780 let has_pred = idx > 0;
781 (start, end, idx, is_last, has_pred)
782 };
783
784 // Determine the byte range to delete:
785 // - if there's a successor: [block_start..block_end) — the
786 // block's content INCLUDING its trailing boundary `\n`
787 // (which is the byte at block_end - 1)
788 // - if last and has predecessor: [block_start - 1..block_end)
789 // — also delete the LEADING boundary `\n` that the previous
790 // entry placed before us
791 // - if last and no predecessor (sole entry): just delete
792 // [block_start..block_end) (no boundary `\n` exists)
793 let (remove_start, remove_end) = if is_last && has_pred {
794 (block_start.saturating_sub(1), block_end)
795 } else {
796 (block_start, block_end)
797 };
798 if remove_end <= remove_start {
799 // Drop the entry only; nothing to remove from the rope.
800 store.block_offsets.write().unwrap().remove_at(idx);
801 return;
802 }
803 let deleted_bytes = remove_end - remove_start;
804
805 {
806 let mut rope = store.rope.write().unwrap();
807 let char_start = rope.byte_to_char(remove_start as usize);
808 let char_end = rope.byte_to_char(remove_end as usize);
809 rope.remove(char_start..char_end);
810 }
811 store.block_offsets.write().unwrap().remove_at(idx);
812 // Shift entries STRICTLY PAST the removed range. Using `remove_end` as
813 // the threshold (rather than `remove_start`) keeps an empty predecessor
814 // whose byte_start equals `remove_start` (the leading boundary `\n` we
815 // just deleted) in place — its content position is unchanged, only its
816 // trailing boundary is gone. `total_bytes` decreases by `deleted_bytes`
817 // regardless of threshold.
818 store
819 .block_offsets
820 .write()
821 .unwrap()
822 .shift_after(remove_end, -(deleted_bytes as i32));
823}
824
825/// Replace the entire content of a registered block in the rope with
826/// `new_text`. Preserves the block's `byte_start` and its trailing
827/// boundary `\n` (if any); subsequent entries shift by the net
828/// length delta.
829///
830/// Used by use cases that compute a block's final content as a string
831/// and want to push that content to the rope in one shot — e.g. the
832/// block-splitting branches of `insert_html_at_position_uc` and
833/// `insert_markdown_at_position_uc`, where each affected block
834/// (head, tail, mid-replacement) gets a single new value.
835///
836/// No-op if `block_id` is not in the index.
837pub fn rope_replace_block_content(store: &Store, block_id: EntityId, new_text: &str) {
838 let (block_byte_start, content_bytes) = {
839 let offsets = store.block_offsets.read().unwrap();
840 let Some((start, end)) = offsets.range_of_block(block_id) else {
841 return;
842 };
843 let total = offsets.total_bytes();
844 // `range_of` extends to the next entry's `byte_start` (or to
845 // `total_bytes`). If there's a following entry, the byte at
846 // `end - 1` is the inter-block boundary `\n` that belongs to
847 // the boundary between this block and the next, not to this
848 // block's content.
849 let has_trailing_boundary = end < total;
850 let content_bytes = if has_trailing_boundary {
851 end - start - 1
852 } else {
853 end - start
854 };
855 (start, content_bytes)
856 };
857
858 let new_bytes = new_text.len() as u32;
859 if content_bytes == 0 && new_bytes == 0 {
860 return;
861 }
862 let delta = new_bytes as i32 - content_bytes as i32;
863
864 // Splice [block_byte_start..block_byte_start + content_bytes)
865 // with `new_text`.
866 {
867 let mut rope = store.rope.write().unwrap();
868 let char_start = rope.byte_to_char(block_byte_start as usize);
869 if content_bytes > 0 {
870 let char_end = rope.byte_to_char((block_byte_start + content_bytes) as usize);
871 rope.remove(char_start..char_end);
872 }
873 if new_bytes > 0 {
874 rope.insert(char_start, new_text);
875 }
876 }
877
878 if delta != 0 {
879 // Shift entries that sit strictly past this block's start
880 // (i.e. the trailing boundary and everything after).
881 store
882 .block_offsets
883 .write()
884 .unwrap()
885 .shift_after(block_byte_start + 1, delta);
886 }
887}
888
889/// Delete bytes `[byte_start_in_block..byte_end_in_block)` from inside
890/// the block identified by `block_id`. Shifts subsequent block offsets
891/// by the deleted byte length. No-op for blocks not in the index.
892pub fn rope_delete_in_block(
893 store: &Store,
894 block_id: EntityId,
895 byte_start_in_block: u32,
896 byte_end_in_block: u32,
897) {
898 if byte_end_in_block <= byte_start_in_block {
899 return;
900 }
901 let deleted_bytes = byte_end_in_block - byte_start_in_block;
902 let block_byte_start = {
903 let offsets = store.block_offsets.read().unwrap();
904 let Some((start, _end)) = offsets.range_of_block(block_id) else {
905 return;
906 };
907 start
908 };
909 let rope_byte_start = block_byte_start + byte_start_in_block;
910 let rope_byte_end = block_byte_start + byte_end_in_block;
911 {
912 let mut rope = store.rope.write().unwrap();
913 let char_start = rope.byte_to_char(rope_byte_start as usize);
914 let char_end = rope.byte_to_char(rope_byte_end as usize);
915 rope.remove(char_start..char_end);
916 }
917 store
918 .block_offsets
919 .write()
920 .unwrap()
921 .shift_after(block_byte_start + 1, -(deleted_bytes as i32));
922}
923
924/// Recompute `Frame.byte_range` for every frame in `store.frames`
925/// based on current `block_offsets` and the frame tree structure.
926/// Plan §1.6 invariant: each frame's byte_range is the (min_start,
927/// max_end) over all its descendant blocks, sub-frames, and table
928/// anchors+cells.
929///
930/// Call this after any mutation that affects rope byte positions.
931/// O(F + B) where F = frames, B = blocks in the document.
932pub fn recompute_all_frame_byte_ranges(store: &Store) {
933 let frame_ids: Vec<EntityId> = {
934 let frames = store.frames.read().unwrap();
935 frames.keys().copied().collect()
936 };
937 for fid in frame_ids {
938 let new_range = compute_frame_byte_range_recursive(store, fid);
939 let mut frames = store.frames.write().unwrap();
940 if let Some(f) = frames.get(&fid).cloned()
941 && f.byte_range != new_range
942 {
943 let mut updated = f;
944 updated.byte_range = new_range;
945 frames.insert(fid, updated);
946 }
947 }
948}
949
950fn compute_frame_byte_range_recursive(store: &Store, frame_id: EntityId) -> (u32, u32) {
951 let mut bounds: Option<(u32, u32)> = None;
952 walk_frame_bounds(store, frame_id, &mut bounds);
953 bounds.unwrap_or((0, 0))
954}
955
956fn walk_frame_bounds(store: &Store, frame_id: EntityId, bounds: &mut Option<(u32, u32)>) {
957 fn merge(bounds: &mut Option<(u32, u32)>, s: u32, e: u32) {
958 *bounds = Some(match *bounds {
959 None => (s, e),
960 Some((min, max)) => (min.min(s), max.max(e)),
961 });
962 }
963
964 let (blocks, child_order, table_id) = {
965 let frames = store.frames.read().unwrap();
966 let Some(f) = frames.get(&frame_id) else {
967 return;
968 };
969 (f.blocks.clone(), f.child_order.clone(), f.table)
970 };
971
972 {
973 let offsets = store.block_offsets.read().unwrap();
974 for bid in &blocks {
975 if let Some((s, e)) = offsets.range_of_block(*bid) {
976 merge(bounds, s, e);
977 }
978 }
979 if let Some(tid) = table_id
980 && let Some((s, e)) = offsets.range_of(OffsetMarker::TableAnchor(tid))
981 {
982 merge(bounds, s, e);
983 }
984 }
985
986 for entry in &child_order {
987 if *entry < 0 {
988 walk_frame_bounds(store, (-*entry) as EntityId, bounds);
989 }
990 }
991
992 if let Some(tid) = table_id {
993 let cell_ids: Vec<EntityId> = {
994 let tables = store.tables.read().unwrap();
995 tables
996 .get(&tid)
997 .map(|t| t.cells.clone())
998 .unwrap_or_default()
999 };
1000 for cell_id in &cell_ids {
1001 let cell_frame_id = {
1002 let cells = store.table_cells.read().unwrap();
1003 cells.get(cell_id).and_then(|c| c.cell_frame)
1004 };
1005 if let Some(cfid) = cell_frame_id {
1006 walk_frame_bounds(store, cfid, bounds);
1007 }
1008 }
1009 }
1010}