Skip to main content

common/database/
rope_helpers.rs

1//! Helpers for writing the global character rope from use cases.
2//!
3//! Each helper mutates `store.rope` and `store.block_offsets`
4//! together so callers can stay oblivious to the underlying layout.
5//! Read helpers (`block_content_via_store`) source content from the
6//! rope and return empty when a block is not yet registered in the
7//! offset index.
8
9use crate::database::Store;
10use crate::database::block_offset_index::OffsetMarker;
11use crate::entities::Block;
12use crate::types::EntityId;
13
14/// Read a block's content from the global rope via `block_offsets`,
15/// stripping the trailing `\n` boundary that `range_of` includes for
16/// non-last entries. Returns an empty string if the block isn't
17/// registered in the offset index (e.g. a freshly-created block that
18/// hasn't been spliced into the rope yet — `setup_with_text` test
19/// docs use this path).
20pub fn block_content_via_store(block: &Block, store: &Store) -> String {
21    let offsets = store.block_offsets.read().unwrap();
22    let marker = OffsetMarker::Block(block.id);
23    let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
24        return String::new();
25    };
26    // Drop the trailing inter-block boundary `\n` ONLY when this block
27    // has a successor entry — that one byte is the boundary `\n` between
28    // this block and the next. The last entry has no trailing boundary;
29    // any final `\n` is real content.
30    let content_end = if has_successor && be > bs { be - 1 } else { be };
31    drop(offsets);
32    let rope = store.rope.read().unwrap();
33    rope.byte_slice(bs as usize..content_end as usize)
34        .to_string()
35}
36
37/// Logical character count of a block — what the old
38/// `Block.text_length` field used to cache. Image anchors are stored as
39/// `\u{FFFC}` (one char, three bytes) inside the rope content, so the
40/// char count already covers them. Returns 0 for blocks not registered
41/// in the offset index.
42///
43/// O(log n) via `ropey::Rope::byte_to_char` — does NOT materialize the
44/// block's text into a String. Replaces the prior O(L) implementation
45/// that counted chars by walking UTF-8 over a cloned slice.
46pub fn block_char_length(block: &Block, store: &Store) -> i64 {
47    let offsets = store.block_offsets.read().unwrap();
48    let marker = OffsetMarker::Block(block.id);
49    let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
50        return 0;
51    };
52    let content_end_bytes = if has_successor && be > bs { be - 1 } else { be };
53    drop(offsets);
54    let rope = store.rope.read().unwrap();
55    let char_start = rope.byte_to_char(bs as usize);
56    let char_end = rope.byte_to_char(content_end_bytes as usize);
57    (char_end - char_start) as i64
58}
59
60/// Return the absolute character position of a block's start in the
61/// document, derived from the rope via `BlockOffsetIndex`.
62///
63/// O(log n): one `range_of_block` lookup + one `byte_to_char` conversion.
64/// Falls back to `block.document_position` for blocks not registered in
65/// the index — currently table-cell blocks (plan §1.6 deferred) and
66/// blocks under non-top-level frames (`insert_frame_uc` only mirrors
67/// top-level frames). The stored field is set authoritatively by those
68/// non-flow paths and stays correct for them across main-flow edits.
69pub fn block_document_position(block: &Block, store: &Store) -> i64 {
70    let offsets = store.block_offsets.read().unwrap();
71    let Some((byte_start, _)) = offsets.range_of_block(block.id) else {
72        return block.document_position;
73    };
74    drop(offsets);
75    let rope = store.rope.read().unwrap();
76    rope.byte_to_char(byte_start as usize) as i64
77}
78
79/// Whether the rope's char-position space matches the user-visible
80/// flow positions that `Block.document_position` is computed against.
81///
82/// They match when every block is mirrored to the rope as a contiguous
83/// run. They DON'T match when:
84/// 1. The document contains tables — cell content sits at separate rope
85///    byte ranges (plan §1.6), so the rope is missing the cells'
86///    flow-position contribution.
87/// 2. The document contains sub-frames whose blocks aren't mirrored —
88///    `insert_frame_uc` only mirrors top-level frames.
89///
90/// When this returns `true`, readers can derive `document_position`
91/// directly from the rope (O(log n)) and the use-case-side
92/// position-refresh loops can be skipped. When it returns `false`,
93/// readers must consult the maintained `Block.document_position`
94/// stored field, and the loops are required to keep it correct.
95pub fn rope_positions_match_flow(store: &Store) -> bool {
96    let offsets = store.block_offsets.read().unwrap();
97    if offsets.table_anchor_count() > 0 {
98        return false;
99    }
100    // With no table anchors, every entry is a Block marker, so
101    // `entries.len()` equals the indexed block count.
102    let indexed_block_count = offsets.entries.len();
103    drop(offsets);
104    let total_block_count = store.blocks.read().unwrap().len();
105    indexed_block_count == total_block_count
106}
107
108/// Locate which block contains a given absolute char position in the
109/// document, returning `(block_id, char_offset_in_block, block_char_start)`
110/// in O(log n) using the rope + `BlockOffsetIndex` instead of an O(N)
111/// linear walk of all blocks.
112///
113/// Replaces the per-keystroke hot path in editing use cases
114/// (`find_block_at_position_sequential`) which fetched every block
115/// + called `block_char_length` per block. For an N-block document
116///   each editor keystroke now costs O(log n) lookups instead of O(N).
117///
118/// Returns `None` for documents containing tables — table cell content
119/// lives in separate byte ranges later in the rope (plan §1.6), so the
120/// rope's char-position space diverges from the user-visible flow
121/// order. Callers must fall back to the slow per-block walk in that
122/// case.
123///
124/// `position` past the document end clamps to the last block's
125/// end-of-content.
126pub fn find_block_at_char_position(store: &Store, position: i64) -> Option<(EntityId, i64, i64)> {
127    // Fast path is only valid when EVERY block in the document is
128    // mirrored to the rope. Disqualifying cases:
129    //
130    // 1. Documents containing tables — cell content sits at separate
131    //    rope byte ranges (plan §1.6) so byte→block lookup finds the
132    //    wrong block for cursor positions inside cells.
133    //
134    // 2. Documents containing sub-frames inserted with a parent —
135    //    `insert_frame_uc` currently only mirrors top-level frames to
136    //    the rope (parent=None case), leaving sub-frame blocks
137    //    unregistered in the offset index. Detect by comparing block
138    //    counts: if the rope index has fewer Block markers than the
139    //    store has Block entities, some are unmirrored.
140    let offsets = store.block_offsets.read().unwrap();
141    if offsets.table_anchor_count() > 0 {
142        return None;
143    }
144    // No table anchors → every entry is a Block marker, so the indexed
145    // block count is simply `entries.len()`. O(1) instead of an O(N)
146    // filter-count on every keystroke.
147    let indexed_block_count = offsets.entries.len();
148    let total_block_count = store.blocks.read().unwrap().len();
149    if indexed_block_count != total_block_count {
150        return None;
151    }
152    drop(offsets);
153
154    let rope = store.rope.read().unwrap();
155    let total_chars = rope.len_chars() as i64;
156    let pos_clamped = position.clamp(0, total_chars);
157    let abs_byte = rope.char_to_byte(pos_clamped as usize);
158    drop(rope);
159
160    let offsets = store.block_offsets.read().unwrap();
161    let block_id = offsets.marker_at_byte(abs_byte as u32)?.as_block()?;
162    let (bs, be, has_successor) = offsets.range_with_successor(OffsetMarker::Block(block_id))?;
163    let content_end = if has_successor && be > bs { be - 1 } else { be };
164
165    drop(offsets);
166
167    let rope = store.rope.read().unwrap();
168    let block_char_start = rope.byte_to_char(bs as usize) as i64;
169    // Clamp the cursor's byte to the block's content area (not into the
170    // trailing `\n` boundary if any).
171    let byte_for_char = std::cmp::min(abs_byte, content_end as usize);
172    let abs_char = rope.byte_to_char(byte_for_char) as i64;
173    let char_in_block = abs_char - block_char_start;
174
175    // NOTE: separator semantics differ across use cases. Callers like
176    // `get_block_at_position_uc` interpret "position at the end of a
177    // non-empty block with a successor" as "belongs to the next
178    // block"; callers like `insert_text_uc` want the previous block
179    // with offset == block_char_len. This helper returns the
180    // previous-block answer (the simpler semantic); use-case-specific
181    // advance logic lives at the call site.
182    let _ = has_successor;
183
184    Some((block_id, char_in_block, block_char_start))
185}
186
187/// Convert an in-block char offset to an in-block byte offset using the
188/// rope's index. Both inputs and outputs are relative to the start of
189/// the block's content (NOT absolute rope positions). The char offset
190/// is clamped to the block's logical length so callers don't need to
191/// pre-validate.
192///
193/// O(log n) via `ropey::Rope::char_to_byte` — replaces the O(L)
194/// "materialize block text + walk char_indices" pattern that
195/// `set_text_format_uc` / `merge_text_format_uc` used. Returns
196/// `(byte_offset_in_block, content_byte_len)` so callers can also
197/// pass the content length to `debug_assert_well_formed` without
198/// materializing the text.
199///
200/// Returns `(0, 0)` for blocks not registered in the offset index.
201pub fn block_char_to_byte_in_block(
202    store: &Store,
203    block_id: EntityId,
204    char_offset: usize,
205) -> (u32, usize) {
206    let offsets = store.block_offsets.read().unwrap();
207    let marker = OffsetMarker::Block(block_id);
208    let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
209        return (0, 0);
210    };
211    let content_end_bytes = if has_successor && be > bs { be - 1 } else { be };
212    let content_byte_len = (content_end_bytes - bs) as usize;
213    drop(offsets);
214
215    let rope = store.rope.read().unwrap();
216    let block_char_start = rope.byte_to_char(bs as usize);
217    let block_char_end = rope.byte_to_char(content_end_bytes as usize);
218    let block_char_len = block_char_end - block_char_start;
219
220    // Clamp char_offset to block's char length.
221    let clamped = std::cmp::min(char_offset, block_char_len);
222    let abs_byte = rope.char_to_byte(block_char_start + clamped);
223    let byte_in_block = (abs_byte - bs as usize) as u32;
224    (byte_in_block, content_byte_len)
225}
226
227/// Fast-path full-document plain text: returns `Some(rope.to_string())`
228/// iff the document is in the canonical flat layout — no table anchors
229/// in the offset index, single top-level frame. In that case the
230/// rope's byte order is the same as the document-flow order, so one
231/// `to_string()` allocation replaces the O(N) per-block walk +
232/// per-block `Cow<str>` materialization that `build_full_text` /
233/// `export_plain_text` would otherwise do.
234///
235/// Returns `None` for documents containing tables or nested frames —
236/// those require the per-frame, per-block traversal because table
237/// cell content lives in separate byte ranges later in the rope
238/// (plan §1.6), not interleaved with the parent frame's bytes.
239///
240/// `top_frame_count` is the caller-known number of top-level frames
241/// (typically obtained from `Document.frames.len()`). Callers
242/// already have this value and pass it in to avoid a redundant uow
243/// query.
244pub fn rope_flat_text_if_simple(store: &Store, top_frame_count: usize) -> Option<String> {
245    if top_frame_count != 1 {
246        return None;
247    }
248    let offsets = store.block_offsets.read().unwrap();
249    let has_table = offsets
250        .entries
251        .iter()
252        .any(|(m, _)| matches!(m, OffsetMarker::TableAnchor(_)));
253    if has_table {
254        return None;
255    }
256    drop(offsets);
257    Some(store.rope.read().unwrap().to_string())
258}
259
260/// Reset the rope to empty and clear `block_offsets`. Called by
261/// importers when they replace the entire document content.
262pub fn rope_reset(store: &Store) {
263    *store.rope.write().unwrap() = ropey::Rope::new();
264    *store.block_offsets.write().unwrap() =
265        crate::database::block_offset_index::BlockOffsetIndex::new();
266}
267
268/// Append `text` to the end of the rope and register `block_id` at
269/// the byte position where the text starts. Returns that byte offset.
270///
271/// Callers are responsible for inserting an inter-block `\n`
272/// (`rope_insert_block_boundary`) before each block AFTER the first
273/// in a contiguous frame.
274pub fn rope_append_block(store: &Store, block_id: EntityId, text: &str) -> u32 {
275    let mut rope = store.rope.write().unwrap();
276    let byte_start = rope.len_bytes() as u32;
277    let char_end = rope.len_chars();
278    rope.insert(char_end, text);
279    let new_total = rope.len_bytes() as u32;
280    drop(rope);
281
282    let mut offsets = store.block_offsets.write().unwrap();
283    offsets.push_block(block_id, byte_start);
284    offsets.set_total_bytes(new_total);
285    byte_start
286}
287
288/// Insert `text` as a new block at `byte_pos` in the rope, prepending
289/// a `\n` boundary. Used by `insert_table_uc` to place cell blocks at
290/// the end of their containing top-level frame's range (plan §1.6),
291/// rather than always at rope end.
292///
293/// Total bytes inserted: `1 + text.len()`. The block's content
294/// occupies `[byte_pos + 1, byte_pos + 1 + text.len())`. The block
295/// entry is registered at `byte_pos + 1` in `block_offsets`.
296///
297/// Existing entries with `byte_start == byte_pos` (e.g. a previous
298/// empty block whose end coincides with this insertion point) are
299/// kept BEFORE the new entry in the Vec, since the inserted `\n`
300/// boundary belongs after them. Entries strictly past `byte_pos`
301/// shift forward by `(1 + text.len())` bytes.
302///
303/// When `byte_pos == total_bytes`, behaves like
304/// `rope_insert_block_boundary` followed by `rope_append_block`.
305pub fn rope_insert_block_at(store: &Store, byte_pos: u32, block_id: EntityId, text: &str) {
306    let delta = (1 + text.len()) as i32;
307    // Vec position: insert AFTER any entry at byte_pos itself
308    // (those represent earlier empty blocks whose `\n` boundary
309    // we are placing now). Only entries strictly past byte_pos
310    // come after our new entry in the Vec.
311    let new_entry_vec_pos = {
312        let offsets = store.block_offsets.read().unwrap();
313        offsets
314            .entries
315            .iter()
316            .position(|(_, bs)| *bs > byte_pos)
317            .unwrap_or(offsets.entries.len())
318    };
319    {
320        let mut rope = store.rope.write().unwrap();
321        let char_idx = rope.byte_to_char(byte_pos as usize);
322        let mut combined = String::with_capacity(1 + text.len());
323        combined.push('\n');
324        combined.push_str(text);
325        rope.insert(char_idx, &combined);
326    }
327    let mut offsets = store.block_offsets.write().unwrap();
328    // Shift entries strictly past byte_pos. Entries AT byte_pos
329    // (the prior empty block) stay where they are — the new `\n`
330    // is conceptually "after" them.
331    offsets.shift_after(byte_pos + 1, delta);
332    offsets.insert_at(
333        new_entry_vec_pos,
334        OffsetMarker::Block(block_id),
335        byte_pos + 1,
336    );
337}
338
339/// Walks up `frame.parent_frame` to find the top-level ancestor of
340/// the given frame, then returns the end byte of that top-level
341/// frame's current rope range — i.e. the byte position where blocks
342/// belonging to that frame's subtree (e.g. table cells per plan §1.6)
343/// should be inserted so they land BEFORE any following top-level
344/// frame's content.
345///
346/// Reads `block_offsets`/`frames`/`tables`/`table_cells` directly, so
347/// the result is fresh even when `Frame.byte_range` has not yet been
348/// recomputed at commit time.
349pub fn top_level_frame_end_byte(store: &Store, frame_id: EntityId) -> u32 {
350    let top_id = {
351        let frames = store.frames.read().unwrap();
352        let mut current = frame_id;
353        loop {
354            let Some(f) = frames.get(&current) else {
355                return 0;
356            };
357            match f.parent_frame {
358                None => break current,
359                Some(p) => current = p,
360            }
361        }
362    };
363    let (_min, max) = compute_frame_byte_range_recursive(store, top_id);
364    max
365}
366
367/// Append a new empty block to the end of the rope, separating it
368/// from any prior content with a `\n` boundary (only if the rope is
369/// already non-empty). Registers `block_id` at the resulting byte
370/// position. Returns that byte position. Used when `insert_frame_uc`
371/// creates a new top-level frame with a single empty block.
372pub fn rope_append_empty_block(store: &Store, block_id: EntityId) -> u32 {
373    let was_empty = store.rope.read().unwrap().len_bytes() == 0;
374    if !was_empty {
375        rope_insert_block_boundary(store);
376    }
377    let pos = store.rope.read().unwrap().len_bytes() as u32;
378    let mut offsets = store.block_offsets.write().unwrap();
379    offsets.push_block(block_id, pos);
380    offsets.set_total_bytes(pos);
381    pos
382}
383
384/// Append a single `\n` inter-block boundary character to the end of
385/// the rope. Does NOT register a block — this is the sentinel between
386/// two adjacent blocks within the same frame (plan §1.4).
387pub fn rope_insert_block_boundary(store: &Store) {
388    let mut rope = store.rope.write().unwrap();
389    let char_end = rope.len_chars();
390    rope.insert(char_end, "\n");
391    let new_total = rope.len_bytes() as u32;
392    drop(rope);
393
394    store
395        .block_offsets
396        .write()
397        .unwrap()
398        .set_total_bytes(new_total);
399}
400
401/// Insert `text` at `byte_offset_in_block` inside the block identified
402/// by `block_id`. Looks up the block's start in the rope via
403/// `block_offsets.range_of()`, splices into the rope, and shifts
404/// subsequent block offsets by the inserted byte length.
405///
406/// Silently no-ops if the block is not registered in the offset index
407/// (this can happen for blocks whose content lives outside the global
408/// rope, e.g. table cells until step 5.5).
409pub fn rope_insert_in_block(
410    store: &Store,
411    block_id: EntityId,
412    byte_offset_in_block: u32,
413    text: &str,
414) {
415    let inserted_bytes = text.len() as u32;
416    if inserted_bytes == 0 {
417        return;
418    }
419    let block_byte_start = {
420        let offsets = store.block_offsets.read().unwrap();
421        let Some((start, _end)) = offsets.range_of_block(block_id) else {
422            return;
423        };
424        start
425    };
426    let rope_byte = block_byte_start + byte_offset_in_block;
427    {
428        let mut rope = store.rope.write().unwrap();
429        let char_idx = rope.byte_to_char(rope_byte as usize);
430        rope.insert(char_idx, text);
431    }
432    // Shift entries past this block by inserted_bytes. Threshold
433    // is one byte past block_byte_start so the current block's own
434    // entry isn't moved.
435    store
436        .block_offsets
437        .write()
438        .unwrap()
439        .shift_after(block_byte_start + 1, inserted_bytes as i32);
440}
441
442/// Split an existing block in the rope at `byte_offset_in_block`:
443/// - inserts a `\n` inter-block boundary at the absolute byte position
444///   `block_start + byte_offset_in_block` in the rope
445/// - shifts entries past that position by +1 byte
446/// - inserts a new entry for `new_block_id` at
447///   `block_start + byte_offset_in_block + 1` (right after the newline),
448///   placed immediately after the original block in the entries Vec
449///
450/// `byte_offset_in_block` may be 0 (split before first char of block,
451/// i.e. insert empty block before this one) or equal to the block's
452/// byte length (split after last char, i.e. insert empty block after).
453pub fn rope_split_block(
454    store: &Store,
455    current_block_id: EntityId,
456    byte_offset_in_block: u32,
457    new_block_id: EntityId,
458) {
459    let current_marker = OffsetMarker::Block(current_block_id);
460    let (block_start, current_idx) = {
461        let offsets = store.block_offsets.read().unwrap();
462        let Some((start, _end)) = offsets.range_of(current_marker) else {
463            return;
464        };
465        let idx = offsets
466            .entries
467            .iter()
468            .position(|(m, _)| *m == current_marker)
469            .unwrap();
470        (start, idx)
471    };
472    let split_byte = block_start + byte_offset_in_block;
473
474    // 1. Insert the `\n` boundary at the split point.
475    {
476        let mut rope = store.rope.write().unwrap();
477        let char_idx = rope.byte_to_char(split_byte as usize);
478        rope.insert(char_idx, "\n");
479    }
480
481    // 2. Shift entries past the split (and total_bytes) by +1.
482    //    Threshold > split_byte so the new entry we insert next
483    //    isn't double-shifted.
484    store
485        .block_offsets
486        .write()
487        .unwrap()
488        .shift_after(split_byte + 1, 1);
489
490    // 3. Register the new block at `split_byte + 1`, immediately
491    //    after the original in the entries Vec.
492    store.block_offsets.write().unwrap().insert_at(
493        current_idx + 1,
494        OffsetMarker::Block(new_block_id),
495        split_byte + 1,
496    );
497}
498
499/// Merge `start_block` and `end_block` by deleting the rope range
500/// `[start_block.start + byte_so .. end_block.start + byte_eo)` — i.e.
501/// the suffix of `start_block`, every block between (and their
502/// boundary newlines), and the prefix of `end_block`. Removes the
503/// index entries for every block strictly between `start_block` and
504/// `end_block` (inclusive of `end_block` itself); the surviving
505/// content lives in `start_block`. Shifts any blocks past `end_block`
506/// by the negative delta.
507///
508/// No-op if `start_block` is not in the index. Skipped for any
509/// intermediate block id whose range is missing from the index (e.g.
510/// table cells until step 5.5e).
511pub fn rope_merge_block_range(
512    store: &Store,
513    start_block_id: EntityId,
514    byte_so_in_start: u32,
515    end_block_id: EntityId,
516    byte_eo_in_end: u32,
517) {
518    let start_marker = OffsetMarker::Block(start_block_id);
519    let end_marker = OffsetMarker::Block(end_block_id);
520    let (start_block_byte, end_block_byte, start_idx, end_idx) = {
521        let offsets = store.block_offsets.read().unwrap();
522        let Some((sb, _)) = offsets.range_of(start_marker) else {
523            return;
524        };
525        let Some((eb, _)) = offsets.range_of(end_marker) else {
526            return;
527        };
528        let si = offsets
529            .entries
530            .iter()
531            .position(|(m, _)| *m == start_marker)
532            .unwrap();
533        let ei = offsets
534            .entries
535            .iter()
536            .position(|(m, _)| *m == end_marker)
537            .unwrap();
538        (sb, eb, si, ei)
539    };
540    if end_idx <= start_idx {
541        return;
542    }
543
544    let delete_start = start_block_byte + byte_so_in_start;
545    let delete_end = end_block_byte + byte_eo_in_end;
546    if delete_end <= delete_start {
547        return;
548    }
549    let deleted_bytes = delete_end - delete_start;
550
551    // 1. Remove the rope range.
552    {
553        let mut rope = store.rope.write().unwrap();
554        let char_start = rope.byte_to_char(delete_start as usize);
555        let char_end = rope.byte_to_char(delete_end as usize);
556        rope.remove(char_start..char_end);
557    }
558
559    // 2. Remove block_offsets entries for [start_idx+1 ..= end_idx].
560    {
561        let mut offsets = store.block_offsets.write().unwrap();
562        offsets.drain_inclusive(start_idx + 1, end_idx);
563    }
564
565    // 3. Shift any remaining entries past the deletion by -deleted_bytes.
566    //    Threshold > delete_start because start_block's own entry
567    //    sits at delete_start - byte_so_in_start (≤ delete_start)
568    //    and must not move.
569    store
570        .block_offsets
571        .write()
572        .unwrap()
573        .shift_after(delete_start + 1, -(deleted_bytes as i32));
574}
575
576/// Insert a U+FFFC OBJECT REPLACEMENT CHARACTER sentinel in the rope
577/// at the table-anchor position, registering a `TableAnchor(table_id)`
578/// marker in the offset index (plan §1.6).
579///
580/// `target_block_id` is the block in the parent frame that the table
581/// is adjacent to. `after` controls whether the table goes BEFORE
582/// (`after = false`) or AFTER the target block.
583///
584/// The 3-byte sentinel is paired with an inter-marker `\n`:
585/// - `after = false`: inserts `\u{FFFC}\n` at `target.byte_start`
586/// - `after = true`, target is NOT the last entry: inserts
587///   `\u{FFFC}\n` at `target.byte_end` (between target's trailing
588///   `\n` and the next entry)
589/// - `after = true`, target IS the last entry: inserts `\n\u{FFFC}`
590///   at `target.byte_end` (rope now ends with the sentinel)
591///
592/// NOTE: cell-internal content is not yet routed through the rope —
593/// the rope reflects table *presence* (3-byte sentinel) only.
594/// Routing cell content is deferred to plan §1.6's `Frame.byte_range`
595/// model.
596///
597/// No-op if `target_block_id` is not in the index.
598pub fn rope_insert_table_anchor(
599    store: &Store,
600    table_id: EntityId,
601    target_block_id: EntityId,
602    after: bool,
603) {
604    const SENTINEL: &str = "\u{FFFC}"; // 3 bytes
605    const SENTINEL_BYTES: u32 = 3;
606
607    let (insert_pos, target_idx, target_is_last) = {
608        let offsets = store.block_offsets.read().unwrap();
609        let target_marker = OffsetMarker::Block(target_block_id);
610        let Some((start, end)) = offsets.range_of(target_marker) else {
611            return;
612        };
613        let idx = offsets
614            .entries
615            .iter()
616            .position(|(m, _)| *m == target_marker)
617            .unwrap();
618        let is_last = idx + 1 == offsets.entries.len();
619        let pos = if after { end } else { start };
620        (pos, idx, is_last)
621    };
622
623    // Insertion strategy:
624    let (rope_inserted, marker_byte_start, new_entry_pos, shift_threshold, shift_delta) = if !after
625    {
626        // Before target: "\u{FFFC}\n" at target.byte_start
627        ("\u{FFFC}\n", insert_pos, target_idx, insert_pos, 4i32)
628    } else if !target_is_last {
629        // After target, with following entries: "\u{FFFC}\n"
630        // at target.byte_end. The TableAnchor sits where the
631        // next block USED to start; that following entry
632        // shifts by 4.
633        ("\u{FFFC}\n", insert_pos, target_idx + 1, insert_pos, 4i32)
634    } else {
635        // After target which is last: "\n\u{FFFC}" appended.
636        // TableAnchor's byte_start sits 1 past the original
637        // total (after the new `\n`).
638        (
639            "\n\u{FFFC}",
640            insert_pos + 1,
641            target_idx + 1,
642            insert_pos,
643            4i32,
644        )
645    };
646
647    // 1. Splice the literal bytes into the rope.
648    {
649        let mut rope = store.rope.write().unwrap();
650        let char_idx = rope.byte_to_char(insert_pos as usize);
651        rope.insert(char_idx, rope_inserted);
652    }
653
654    // 2. Shift entries past the insertion point. Use shift_after
655    //    BEFORE inserting our new entry so we don't double-shift.
656    store
657        .block_offsets
658        .write()
659        .unwrap()
660        .shift_after(shift_threshold, shift_delta);
661
662    // 3. Register the TableAnchor at the resolved byte position
663    //    and the resolved Vec position.
664    store.block_offsets.write().unwrap().insert_at(
665        new_entry_pos,
666        OffsetMarker::TableAnchor(table_id),
667        marker_byte_start,
668    );
669
670    // Note: SENTINEL_BYTES is part of `shift_delta` (3 for the
671    // sentinel + 1 for the `\n`).
672    let _ = SENTINEL;
673    let _ = SENTINEL_BYTES;
674}
675
676/// Append a U+FFFC table-anchor sentinel at the end of the rope and
677/// register a `TableAnchor(table_id)` marker. If the rope is already
678/// non-empty, prepends a `\n` boundary so the sentinel doesn't run
679/// into the previous entry's content.
680///
681/// Used by import paths (`import_html_uc`, `import_markdown_uc`)
682/// that process the document linearly and append entities as they
683/// encounter them, rather than inserting relative to an existing
684/// target block.
685pub fn rope_append_table_anchor(store: &Store, table_id: EntityId) {
686    let (anchor_byte_start, new_total) = {
687        let mut rope = store.rope.write().unwrap();
688        let was_empty = rope.len_bytes() == 0;
689        let char_end = rope.len_chars();
690        let to_insert = if was_empty { "\u{FFFC}" } else { "\n\u{FFFC}" };
691        rope.insert(char_end, to_insert);
692        let new_total = rope.len_bytes() as u32;
693        // Sentinel is 3 bytes; if a `\n` was prepended that's 1 byte
694        // before the sentinel.
695        let anchor_byte_start = new_total - 3;
696        (anchor_byte_start, new_total)
697    };
698
699    let mut offsets = store.block_offsets.write().unwrap();
700    offsets.push(OffsetMarker::TableAnchor(table_id), anchor_byte_start);
701    offsets.set_total_bytes(new_total);
702}
703
704/// Remove a TableAnchor sentinel from the rope, undoing the effect
705/// of `rope_insert_table_anchor`. Looks up the anchor's byte range
706/// (always 3 bytes for the U+FFFC plus 1 byte of inter-marker `\n`
707/// either before or after, depending on what's adjacent), removes
708/// those 4 bytes from the rope, drops the entry, shifts trailing
709/// entries by -4.
710///
711/// No-op if no TableAnchor for `table_id` exists.
712pub fn rope_remove_table_anchor(store: &Store, table_id: EntityId) {
713    let anchor_marker = OffsetMarker::TableAnchor(table_id);
714    let (anchor_byte_start, anchor_idx, anchor_is_last, has_predecessor) = {
715        let offsets = store.block_offsets.read().unwrap();
716        let Some((start, _end)) = offsets.range_of(anchor_marker) else {
717            return;
718        };
719        let idx = offsets
720            .entries
721            .iter()
722            .position(|(m, _)| *m == anchor_marker)
723            .unwrap();
724        let is_last = idx + 1 == offsets.entries.len();
725        let has_pred = idx > 0;
726        (start, idx, is_last, has_pred)
727    };
728
729    // Symmetric to insert_table_anchor. The 4 bytes to remove are:
730    // - if anchor is last: [byte_start - 1 .. byte_start + 3) — the
731    //   preceding `\n` + the 3-byte sentinel
732    // - otherwise: [byte_start .. byte_start + 4) — the sentinel
733    //   + the following `\n`
734    let (remove_start, remove_end) = if anchor_is_last && has_predecessor {
735        (anchor_byte_start - 1, anchor_byte_start + 3)
736    } else {
737        (anchor_byte_start, anchor_byte_start + 4)
738    };
739
740    {
741        let mut rope = store.rope.write().unwrap();
742        let char_start = rope.byte_to_char(remove_start as usize);
743        let char_end = rope.byte_to_char(remove_end as usize);
744        rope.remove(char_start..char_end);
745    }
746    {
747        let mut offsets = store.block_offsets.write().unwrap();
748        offsets.remove_at(anchor_idx);
749    }
750    store
751        .block_offsets
752        .write()
753        .unwrap()
754        .shift_after(remove_start, -4);
755}
756
757/// Remove a registered block from the rope: drops its content bytes
758/// plus one boundary `\n` (the one after, if the block has a
759/// successor; the one before, if it's the last entry), removes the
760/// entry from the index, and shifts trailing entries by the negative
761/// byte delta.
762///
763/// No-op if `block_id` is not in the index. No-op for the special
764/// case of a single-block document being asked to remove its sole
765/// block (we'd produce an empty rope but the block itself is being
766/// cascaded by the caller).
767pub fn rope_remove_block(store: &Store, block_id: EntityId) {
768    let block_marker = OffsetMarker::Block(block_id);
769    let (block_start, block_end, idx, is_last, has_pred) = {
770        let offsets = store.block_offsets.read().unwrap();
771        let Some((start, end)) = offsets.range_of(block_marker) else {
772            return;
773        };
774        let idx = offsets
775            .entries
776            .iter()
777            .position(|(m, _)| *m == block_marker)
778            .unwrap();
779        let is_last = idx + 1 == offsets.entries.len();
780        let has_pred = idx > 0;
781        (start, end, idx, is_last, has_pred)
782    };
783
784    // Determine the byte range to delete:
785    // - if there's a successor: [block_start..block_end) — the
786    //   block's content INCLUDING its trailing boundary `\n`
787    //   (which is the byte at block_end - 1)
788    // - if last and has predecessor: [block_start - 1..block_end)
789    //   — also delete the LEADING boundary `\n` that the previous
790    //   entry placed before us
791    // - if last and no predecessor (sole entry): just delete
792    //   [block_start..block_end) (no boundary `\n` exists)
793    let (remove_start, remove_end) = if is_last && has_pred {
794        (block_start.saturating_sub(1), block_end)
795    } else {
796        (block_start, block_end)
797    };
798    if remove_end <= remove_start {
799        // Drop the entry only; nothing to remove from the rope.
800        store.block_offsets.write().unwrap().remove_at(idx);
801        return;
802    }
803    let deleted_bytes = remove_end - remove_start;
804
805    {
806        let mut rope = store.rope.write().unwrap();
807        let char_start = rope.byte_to_char(remove_start as usize);
808        let char_end = rope.byte_to_char(remove_end as usize);
809        rope.remove(char_start..char_end);
810    }
811    store.block_offsets.write().unwrap().remove_at(idx);
812    // Shift entries STRICTLY PAST the removed range. Using `remove_end` as
813    // the threshold (rather than `remove_start`) keeps an empty predecessor
814    // whose byte_start equals `remove_start` (the leading boundary `\n` we
815    // just deleted) in place — its content position is unchanged, only its
816    // trailing boundary is gone. `total_bytes` decreases by `deleted_bytes`
817    // regardless of threshold.
818    store
819        .block_offsets
820        .write()
821        .unwrap()
822        .shift_after(remove_end, -(deleted_bytes as i32));
823}
824
825/// Replace the entire content of a registered block in the rope with
826/// `new_text`. Preserves the block's `byte_start` and its trailing
827/// boundary `\n` (if any); subsequent entries shift by the net
828/// length delta.
829///
830/// Used by use cases that compute a block's final content as a string
831/// and want to push that content to the rope in one shot — e.g. the
832/// block-splitting branches of `insert_html_at_position_uc` and
833/// `insert_markdown_at_position_uc`, where each affected block
834/// (head, tail, mid-replacement) gets a single new value.
835///
836/// No-op if `block_id` is not in the index.
837pub fn rope_replace_block_content(store: &Store, block_id: EntityId, new_text: &str) {
838    let (block_byte_start, content_bytes) = {
839        let offsets = store.block_offsets.read().unwrap();
840        let Some((start, end)) = offsets.range_of_block(block_id) else {
841            return;
842        };
843        let total = offsets.total_bytes();
844        // `range_of` extends to the next entry's `byte_start` (or to
845        // `total_bytes`). If there's a following entry, the byte at
846        // `end - 1` is the inter-block boundary `\n` that belongs to
847        // the boundary between this block and the next, not to this
848        // block's content.
849        let has_trailing_boundary = end < total;
850        let content_bytes = if has_trailing_boundary {
851            end - start - 1
852        } else {
853            end - start
854        };
855        (start, content_bytes)
856    };
857
858    let new_bytes = new_text.len() as u32;
859    if content_bytes == 0 && new_bytes == 0 {
860        return;
861    }
862    let delta = new_bytes as i32 - content_bytes as i32;
863
864    // Splice [block_byte_start..block_byte_start + content_bytes)
865    // with `new_text`.
866    {
867        let mut rope = store.rope.write().unwrap();
868        let char_start = rope.byte_to_char(block_byte_start as usize);
869        if content_bytes > 0 {
870            let char_end = rope.byte_to_char((block_byte_start + content_bytes) as usize);
871            rope.remove(char_start..char_end);
872        }
873        if new_bytes > 0 {
874            rope.insert(char_start, new_text);
875        }
876    }
877
878    if delta != 0 {
879        // Shift entries that sit strictly past this block's start
880        // (i.e. the trailing boundary and everything after).
881        store
882            .block_offsets
883            .write()
884            .unwrap()
885            .shift_after(block_byte_start + 1, delta);
886    }
887}
888
889/// Delete bytes `[byte_start_in_block..byte_end_in_block)` from inside
890/// the block identified by `block_id`. Shifts subsequent block offsets
891/// by the deleted byte length. No-op for blocks not in the index.
892pub fn rope_delete_in_block(
893    store: &Store,
894    block_id: EntityId,
895    byte_start_in_block: u32,
896    byte_end_in_block: u32,
897) {
898    if byte_end_in_block <= byte_start_in_block {
899        return;
900    }
901    let deleted_bytes = byte_end_in_block - byte_start_in_block;
902    let block_byte_start = {
903        let offsets = store.block_offsets.read().unwrap();
904        let Some((start, _end)) = offsets.range_of_block(block_id) else {
905            return;
906        };
907        start
908    };
909    let rope_byte_start = block_byte_start + byte_start_in_block;
910    let rope_byte_end = block_byte_start + byte_end_in_block;
911    {
912        let mut rope = store.rope.write().unwrap();
913        let char_start = rope.byte_to_char(rope_byte_start as usize);
914        let char_end = rope.byte_to_char(rope_byte_end as usize);
915        rope.remove(char_start..char_end);
916    }
917    store
918        .block_offsets
919        .write()
920        .unwrap()
921        .shift_after(block_byte_start + 1, -(deleted_bytes as i32));
922}
923
924/// Recompute `Frame.byte_range` for every frame in `store.frames`
925/// based on current `block_offsets` and the frame tree structure.
926/// Plan §1.6 invariant: each frame's byte_range is the (min_start,
927/// max_end) over all its descendant blocks, sub-frames, and table
928/// anchors+cells.
929///
930/// Call this after any mutation that affects rope byte positions.
931/// O(F + B) where F = frames, B = blocks in the document.
932pub fn recompute_all_frame_byte_ranges(store: &Store) {
933    let frame_ids: Vec<EntityId> = {
934        let frames = store.frames.read().unwrap();
935        frames.keys().copied().collect()
936    };
937    for fid in frame_ids {
938        let new_range = compute_frame_byte_range_recursive(store, fid);
939        let mut frames = store.frames.write().unwrap();
940        if let Some(f) = frames.get(&fid).cloned()
941            && f.byte_range != new_range
942        {
943            let mut updated = f;
944            updated.byte_range = new_range;
945            frames.insert(fid, updated);
946        }
947    }
948}
949
950fn compute_frame_byte_range_recursive(store: &Store, frame_id: EntityId) -> (u32, u32) {
951    let mut bounds: Option<(u32, u32)> = None;
952    walk_frame_bounds(store, frame_id, &mut bounds);
953    bounds.unwrap_or((0, 0))
954}
955
956fn walk_frame_bounds(store: &Store, frame_id: EntityId, bounds: &mut Option<(u32, u32)>) {
957    fn merge(bounds: &mut Option<(u32, u32)>, s: u32, e: u32) {
958        *bounds = Some(match *bounds {
959            None => (s, e),
960            Some((min, max)) => (min.min(s), max.max(e)),
961        });
962    }
963
964    let (blocks, child_order, table_id) = {
965        let frames = store.frames.read().unwrap();
966        let Some(f) = frames.get(&frame_id) else {
967            return;
968        };
969        (f.blocks.clone(), f.child_order.clone(), f.table)
970    };
971
972    {
973        let offsets = store.block_offsets.read().unwrap();
974        for bid in &blocks {
975            if let Some((s, e)) = offsets.range_of_block(*bid) {
976                merge(bounds, s, e);
977            }
978        }
979        if let Some(tid) = table_id
980            && let Some((s, e)) = offsets.range_of(OffsetMarker::TableAnchor(tid))
981        {
982            merge(bounds, s, e);
983        }
984    }
985
986    for entry in &child_order {
987        if *entry < 0 {
988            walk_frame_bounds(store, (-*entry) as EntityId, bounds);
989        }
990    }
991
992    if let Some(tid) = table_id {
993        let cell_ids: Vec<EntityId> = {
994            let tables = store.tables.read().unwrap();
995            tables
996                .get(&tid)
997                .map(|t| t.cells.clone())
998                .unwrap_or_default()
999        };
1000        for cell_id in &cell_ids {
1001            let cell_frame_id = {
1002                let cells = store.table_cells.read().unwrap();
1003                cells.get(cell_id).and_then(|c| c.cell_frame)
1004            };
1005            if let Some(cfid) = cell_frame_id {
1006                walk_frame_bounds(store, cfid, bounds);
1007            }
1008        }
1009    }
1010}