Skip to main content

common/database/
rope_helpers.rs

1//! Helpers for writing the global character rope from use cases.
2//!
3//! Each helper mutates `store.rope` and `store.block_offsets`
4//! together so callers can stay oblivious to the underlying layout.
5//! Read helpers (`block_content_via_store`) prefer rope bytes but
6//! fall back to `Block.plain_text` when the rope is stale (e.g.
7//! after an unmirrored use case). The fallback goes away in step
8//! 7c when `Block.plain_text` is removed.
9
10use crate::database::Store;
11use crate::database::block_offset_index::OffsetMarker;
12use crate::entities::Block;
13use crate::types::EntityId;
14
15/// Read a block's content from the global rope via `block_offsets`,
16/// stripping the trailing `\n` boundary that `range_of` includes for
17/// non-last entries. Returns an empty string if the block isn't
18/// registered in the offset index (e.g. a freshly-created block that
19/// hasn't been spliced into the rope yet — `setup_with_text` test
20/// docs use this path).
21pub fn block_content_via_store(block: &Block, store: &Store) -> String {
22    let offsets = store.block_offsets.read().unwrap();
23    let marker = OffsetMarker::Block(block.id);
24    let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
25        return String::new();
26    };
27    // Drop the trailing inter-block boundary `\n` ONLY when this block
28    // has a successor entry — that one byte is the boundary `\n` between
29    // this block and the next. The last entry has no trailing boundary;
30    // any final `\n` is real content.
31    let content_end = if has_successor && be > bs { be - 1 } else { be };
32    drop(offsets);
33    let rope = store.rope.read().unwrap();
34    rope.byte_slice(bs as usize..content_end as usize)
35        .to_string()
36}
37
38/// Logical character count of a block — what the old
39/// `Block.text_length` field used to cache. Image anchors are stored as
40/// `\u{FFFC}` (one char, three bytes) inside the rope content, so the
41/// char count already covers them. Returns 0 for blocks not registered
42/// in the offset index.
43///
44/// O(log n) via `ropey::Rope::byte_to_char` — does NOT materialize the
45/// block's text into a String. Replaces the prior O(L) implementation
46/// that counted chars by walking UTF-8 over a cloned slice.
47pub fn block_char_length(block: &Block, store: &Store) -> i64 {
48    let offsets = store.block_offsets.read().unwrap();
49    let marker = OffsetMarker::Block(block.id);
50    let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
51        return 0;
52    };
53    let content_end_bytes = if has_successor && be > bs { be - 1 } else { be };
54    drop(offsets);
55    let rope = store.rope.read().unwrap();
56    let char_start = rope.byte_to_char(bs as usize);
57    let char_end = rope.byte_to_char(content_end_bytes as usize);
58    (char_end - char_start) as i64
59}
60
61/// Locate which block contains a given absolute char position in the
62/// document, returning `(block_id, char_offset_in_block, block_char_start)`
63/// in O(log n) using the rope + `BlockOffsetIndex` instead of an O(N)
64/// linear walk of all blocks.
65///
66/// Replaces the per-keystroke hot path in editing use cases
67/// (`find_block_at_position_sequential`) which fetched every block
68/// + called `block_char_length` per block. For an N-block document
69///   each editor keystroke now costs O(log n) lookups instead of O(N).
70///
71/// Returns `None` for documents containing tables — table cell content
72/// lives in separate byte ranges later in the rope (plan §1.6), so the
73/// rope's char-position space diverges from the user-visible flow
74/// order. Callers must fall back to the slow per-block walk in that
75/// case.
76///
77/// `position` past the document end clamps to the last block's
78/// end-of-content.
79pub fn find_block_at_char_position(store: &Store, position: i64) -> Option<(EntityId, i64, i64)> {
80    // Fast path is only valid when EVERY block in the document is
81    // mirrored to the rope. Disqualifying cases:
82    //
83    // 1. Documents containing tables — cell content sits at separate
84    //    rope byte ranges (plan §1.6) so byte→block lookup finds the
85    //    wrong block for cursor positions inside cells.
86    //
87    // 2. Documents containing sub-frames inserted with a parent —
88    //    `insert_frame_uc` currently only mirrors top-level frames to
89    //    the rope (parent=None case), leaving sub-frame blocks
90    //    unregistered in the offset index. Detect by comparing block
91    //    counts: if the rope index has fewer Block markers than the
92    //    store has Block entities, some are unmirrored.
93    let offsets = store.block_offsets.read().unwrap();
94    if offsets
95        .entries
96        .iter()
97        .any(|(m, _)| matches!(m, OffsetMarker::TableAnchor(_)))
98    {
99        return None;
100    }
101    let indexed_block_count = offsets
102        .entries
103        .iter()
104        .filter(|(m, _)| matches!(m, OffsetMarker::Block(_)))
105        .count();
106    let total_block_count = store.blocks.read().unwrap().len();
107    if indexed_block_count != total_block_count {
108        return None;
109    }
110    drop(offsets);
111
112    let rope = store.rope.read().unwrap();
113    let total_chars = rope.len_chars() as i64;
114    let pos_clamped = position.clamp(0, total_chars);
115    let abs_byte = rope.char_to_byte(pos_clamped as usize);
116    drop(rope);
117
118    let offsets = store.block_offsets.read().unwrap();
119    let block_id = offsets.marker_at_byte(abs_byte as u32)?.as_block()?;
120    let (bs, be, has_successor) = offsets.range_with_successor(OffsetMarker::Block(block_id))?;
121    let content_end = if has_successor && be > bs { be - 1 } else { be };
122
123    drop(offsets);
124
125    let rope = store.rope.read().unwrap();
126    let block_char_start = rope.byte_to_char(bs as usize) as i64;
127    // Clamp the cursor's byte to the block's content area (not into the
128    // trailing `\n` boundary if any).
129    let byte_for_char = std::cmp::min(abs_byte, content_end as usize);
130    let abs_char = rope.byte_to_char(byte_for_char) as i64;
131    let char_in_block = abs_char - block_char_start;
132
133    // NOTE: separator semantics differ across use cases. Callers like
134    // `get_block_at_position_uc` interpret "position at the end of a
135    // non-empty block with a successor" as "belongs to the next
136    // block"; callers like `insert_text_uc` want the previous block
137    // with offset == block_char_len. This helper returns the
138    // previous-block answer (the simpler semantic); use-case-specific
139    // advance logic lives at the call site.
140    let _ = has_successor;
141
142    Some((block_id, char_in_block, block_char_start))
143}
144
145/// Convert an in-block char offset to an in-block byte offset using the
146/// rope's index. Both inputs and outputs are relative to the start of
147/// the block's content (NOT absolute rope positions). The char offset
148/// is clamped to the block's logical length so callers don't need to
149/// pre-validate.
150///
151/// O(log n) via `ropey::Rope::char_to_byte` — replaces the O(L)
152/// "materialize block text + walk char_indices" pattern that
153/// `set_text_format_uc` / `merge_text_format_uc` used. Returns
154/// `(byte_offset_in_block, content_byte_len)` so callers can also
155/// pass the content length to `debug_assert_well_formed` without
156/// materializing the text.
157///
158/// Returns `(0, 0)` for blocks not registered in the offset index.
159pub fn block_char_to_byte_in_block(
160    store: &Store,
161    block_id: EntityId,
162    char_offset: usize,
163) -> (u32, usize) {
164    let offsets = store.block_offsets.read().unwrap();
165    let marker = OffsetMarker::Block(block_id);
166    let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
167        return (0, 0);
168    };
169    let content_end_bytes = if has_successor && be > bs { be - 1 } else { be };
170    let content_byte_len = (content_end_bytes - bs) as usize;
171    drop(offsets);
172
173    let rope = store.rope.read().unwrap();
174    let block_char_start = rope.byte_to_char(bs as usize);
175    let block_char_end = rope.byte_to_char(content_end_bytes as usize);
176    let block_char_len = block_char_end - block_char_start;
177
178    // Clamp char_offset to block's char length.
179    let clamped = std::cmp::min(char_offset, block_char_len);
180    let abs_byte = rope.char_to_byte(block_char_start + clamped);
181    let byte_in_block = (abs_byte - bs as usize) as u32;
182    (byte_in_block, content_byte_len)
183}
184
185/// Fast-path full-document plain text: returns `Some(rope.to_string())`
186/// iff the document is in the canonical flat layout — no table anchors
187/// in the offset index, single top-level frame. In that case the
188/// rope's byte order is the same as the document-flow order, so one
189/// `to_string()` allocation replaces the O(N) per-block walk +
190/// per-block `Cow<str>` materialization that `build_full_text` /
191/// `export_plain_text` would otherwise do.
192///
193/// Returns `None` for documents containing tables or nested frames —
194/// those require the per-frame, per-block traversal because table
195/// cell content lives in separate byte ranges later in the rope
196/// (plan §1.6), not interleaved with the parent frame's bytes.
197///
198/// `top_frame_count` is the caller-known number of top-level frames
199/// (typically obtained from `Document.frames.len()`). Callers
200/// already have this value and pass it in to avoid a redundant uow
201/// query.
202pub fn rope_flat_text_if_simple(store: &Store, top_frame_count: usize) -> Option<String> {
203    if top_frame_count != 1 {
204        return None;
205    }
206    let offsets = store.block_offsets.read().unwrap();
207    let has_table = offsets
208        .entries
209        .iter()
210        .any(|(m, _)| matches!(m, OffsetMarker::TableAnchor(_)));
211    if has_table {
212        return None;
213    }
214    drop(offsets);
215    Some(store.rope.read().unwrap().to_string())
216}
217
218/// Reset the rope to empty and clear `block_offsets`. Called by
219/// importers when they replace the entire document content.
220pub fn rope_reset(store: &Store) {
221    *store.rope.write().unwrap() = ropey::Rope::new();
222    *store.block_offsets.write().unwrap() =
223        crate::database::block_offset_index::BlockOffsetIndex::new();
224}
225
226/// Append `text` to the end of the rope and register `block_id` at
227/// the byte position where the text starts. Returns that byte offset.
228///
229/// Callers are responsible for inserting an inter-block `\n`
230/// (`rope_insert_block_boundary`) before each block AFTER the first
231/// in a contiguous frame.
232pub fn rope_append_block(store: &Store, block_id: EntityId, text: &str) -> u32 {
233    let mut rope = store.rope.write().unwrap();
234    let byte_start = rope.len_bytes() as u32;
235    let char_end = rope.len_chars();
236    rope.insert(char_end, text);
237    let new_total = rope.len_bytes() as u32;
238    drop(rope);
239
240    let mut offsets = store.block_offsets.write().unwrap();
241    offsets.push_block(block_id, byte_start);
242    offsets.set_total_bytes(new_total);
243    byte_start
244}
245
246/// Insert `text` as a new block at `byte_pos` in the rope, prepending
247/// a `\n` boundary. Used by `insert_table_uc` to place cell blocks at
248/// the end of their containing top-level frame's range (plan §1.6),
249/// rather than always at rope end.
250///
251/// Total bytes inserted: `1 + text.len()`. The block's content
252/// occupies `[byte_pos + 1, byte_pos + 1 + text.len())`. The block
253/// entry is registered at `byte_pos + 1` in `block_offsets`.
254///
255/// Existing entries with `byte_start == byte_pos` (e.g. a previous
256/// empty block whose end coincides with this insertion point) are
257/// kept BEFORE the new entry in the Vec, since the inserted `\n`
258/// boundary belongs after them. Entries strictly past `byte_pos`
259/// shift forward by `(1 + text.len())` bytes.
260///
261/// When `byte_pos == total_bytes`, behaves like
262/// `rope_insert_block_boundary` followed by `rope_append_block`.
263pub fn rope_insert_block_at(store: &Store, byte_pos: u32, block_id: EntityId, text: &str) {
264    let delta = (1 + text.len()) as i32;
265    // Vec position: insert AFTER any entry at byte_pos itself
266    // (those represent earlier empty blocks whose `\n` boundary
267    // we are placing now). Only entries strictly past byte_pos
268    // come after our new entry in the Vec.
269    let new_entry_vec_pos = {
270        let offsets = store.block_offsets.read().unwrap();
271        offsets
272            .entries
273            .iter()
274            .position(|(_, bs)| *bs > byte_pos)
275            .unwrap_or(offsets.entries.len())
276    };
277    {
278        let mut rope = store.rope.write().unwrap();
279        let char_idx = rope.byte_to_char(byte_pos as usize);
280        let mut combined = String::with_capacity(1 + text.len());
281        combined.push('\n');
282        combined.push_str(text);
283        rope.insert(char_idx, &combined);
284    }
285    let mut offsets = store.block_offsets.write().unwrap();
286    // Shift entries strictly past byte_pos. Entries AT byte_pos
287    // (the prior empty block) stay where they are — the new `\n`
288    // is conceptually "after" them.
289    offsets.shift_after(byte_pos + 1, delta);
290    offsets.insert_at(
291        new_entry_vec_pos,
292        OffsetMarker::Block(block_id),
293        byte_pos + 1,
294    );
295}
296
297/// Walks up `frame.parent_frame` to find the top-level ancestor of
298/// the given frame, then returns the end byte of that top-level
299/// frame's current rope range — i.e. the byte position where blocks
300/// belonging to that frame's subtree (e.g. table cells per plan §1.6)
301/// should be inserted so they land BEFORE any following top-level
302/// frame's content.
303///
304/// Reads `block_offsets`/`frames`/`tables`/`table_cells` directly, so
305/// the result is fresh even when `Frame.byte_range` has not yet been
306/// recomputed at commit time.
307pub fn top_level_frame_end_byte(store: &Store, frame_id: EntityId) -> u32 {
308    let top_id = {
309        let frames = store.frames.read().unwrap();
310        let mut current = frame_id;
311        loop {
312            let Some(f) = frames.get(&current) else {
313                return 0;
314            };
315            match f.parent_frame {
316                None => break current,
317                Some(p) => current = p,
318            }
319        }
320    };
321    let (_min, max) = compute_frame_byte_range_recursive(store, top_id);
322    max
323}
324
325/// Append a new empty block to the end of the rope, separating it
326/// from any prior content with a `\n` boundary (only if the rope is
327/// already non-empty). Registers `block_id` at the resulting byte
328/// position. Returns that byte position. Used when `insert_frame_uc`
329/// creates a new top-level frame with a single empty block.
330pub fn rope_append_empty_block(store: &Store, block_id: EntityId) -> u32 {
331    let was_empty = store.rope.read().unwrap().len_bytes() == 0;
332    if !was_empty {
333        rope_insert_block_boundary(store);
334    }
335    let pos = store.rope.read().unwrap().len_bytes() as u32;
336    let mut offsets = store.block_offsets.write().unwrap();
337    offsets.push_block(block_id, pos);
338    offsets.set_total_bytes(pos);
339    pos
340}
341
342/// Append a single `\n` inter-block boundary character to the end of
343/// the rope. Does NOT register a block — this is the sentinel between
344/// two adjacent blocks within the same frame (plan §1.4).
345pub fn rope_insert_block_boundary(store: &Store) {
346    let mut rope = store.rope.write().unwrap();
347    let char_end = rope.len_chars();
348    rope.insert(char_end, "\n");
349    let new_total = rope.len_bytes() as u32;
350    drop(rope);
351
352    store
353        .block_offsets
354        .write()
355        .unwrap()
356        .set_total_bytes(new_total);
357}
358
359/// Insert `text` at `byte_offset_in_block` inside the block identified
360/// by `block_id`. Looks up the block's start in the rope via
361/// `block_offsets.range_of()`, splices into the rope, and shifts
362/// subsequent block offsets by the inserted byte length.
363///
364/// Silently no-ops if the block is not registered in the offset index
365/// (this can happen for blocks whose content lives outside the global
366/// rope, e.g. table cells until step 5.5).
367pub fn rope_insert_in_block(
368    store: &Store,
369    block_id: EntityId,
370    byte_offset_in_block: u32,
371    text: &str,
372) {
373    let inserted_bytes = text.len() as u32;
374    if inserted_bytes == 0 {
375        return;
376    }
377    let block_byte_start = {
378        let offsets = store.block_offsets.read().unwrap();
379        let Some((start, _end)) = offsets.range_of_block(block_id) else {
380            return;
381        };
382        start
383    };
384    let rope_byte = block_byte_start + byte_offset_in_block;
385    {
386        let mut rope = store.rope.write().unwrap();
387        let char_idx = rope.byte_to_char(rope_byte as usize);
388        rope.insert(char_idx, text);
389    }
390    // Shift entries past this block by inserted_bytes. Threshold
391    // is one byte past block_byte_start so the current block's own
392    // entry isn't moved.
393    store
394        .block_offsets
395        .write()
396        .unwrap()
397        .shift_after(block_byte_start + 1, inserted_bytes as i32);
398}
399
400/// Split an existing block in the rope at `byte_offset_in_block`:
401/// - inserts a `\n` inter-block boundary at the absolute byte position
402///   `block_start + byte_offset_in_block` in the rope
403/// - shifts entries past that position by +1 byte
404/// - inserts a new entry for `new_block_id` at
405///   `block_start + byte_offset_in_block + 1` (right after the newline),
406///   placed immediately after the original block in the entries Vec
407///
408/// `byte_offset_in_block` may be 0 (split before first char of block,
409/// i.e. insert empty block before this one) or equal to the block's
410/// byte length (split after last char, i.e. insert empty block after).
411pub fn rope_split_block(
412    store: &Store,
413    current_block_id: EntityId,
414    byte_offset_in_block: u32,
415    new_block_id: EntityId,
416) {
417    let current_marker = OffsetMarker::Block(current_block_id);
418    let (block_start, current_idx) = {
419        let offsets = store.block_offsets.read().unwrap();
420        let Some((start, _end)) = offsets.range_of(current_marker) else {
421            return;
422        };
423        let idx = offsets
424            .entries
425            .iter()
426            .position(|(m, _)| *m == current_marker)
427            .unwrap();
428        (start, idx)
429    };
430    let split_byte = block_start + byte_offset_in_block;
431
432    // 1. Insert the `\n` boundary at the split point.
433    {
434        let mut rope = store.rope.write().unwrap();
435        let char_idx = rope.byte_to_char(split_byte as usize);
436        rope.insert(char_idx, "\n");
437    }
438
439    // 2. Shift entries past the split (and total_bytes) by +1.
440    //    Threshold > split_byte so the new entry we insert next
441    //    isn't double-shifted.
442    store
443        .block_offsets
444        .write()
445        .unwrap()
446        .shift_after(split_byte + 1, 1);
447
448    // 3. Register the new block at `split_byte + 1`, immediately
449    //    after the original in the entries Vec.
450    store.block_offsets.write().unwrap().insert_at(
451        current_idx + 1,
452        OffsetMarker::Block(new_block_id),
453        split_byte + 1,
454    );
455}
456
457/// Merge `start_block` and `end_block` by deleting the rope range
458/// `[start_block.start + byte_so .. end_block.start + byte_eo)` — i.e.
459/// the suffix of `start_block`, every block between (and their
460/// boundary newlines), and the prefix of `end_block`. Removes the
461/// index entries for every block strictly between `start_block` and
462/// `end_block` (inclusive of `end_block` itself); the surviving
463/// content lives in `start_block`. Shifts any blocks past `end_block`
464/// by the negative delta.
465///
466/// No-op if `start_block` is not in the index. Skipped for any
467/// intermediate block id whose range is missing from the index (e.g.
468/// table cells until step 5.5e).
469pub fn rope_merge_block_range(
470    store: &Store,
471    start_block_id: EntityId,
472    byte_so_in_start: u32,
473    end_block_id: EntityId,
474    byte_eo_in_end: u32,
475) {
476    let start_marker = OffsetMarker::Block(start_block_id);
477    let end_marker = OffsetMarker::Block(end_block_id);
478    let (start_block_byte, end_block_byte, start_idx, end_idx) = {
479        let offsets = store.block_offsets.read().unwrap();
480        let Some((sb, _)) = offsets.range_of(start_marker) else {
481            return;
482        };
483        let Some((eb, _)) = offsets.range_of(end_marker) else {
484            return;
485        };
486        let si = offsets
487            .entries
488            .iter()
489            .position(|(m, _)| *m == start_marker)
490            .unwrap();
491        let ei = offsets
492            .entries
493            .iter()
494            .position(|(m, _)| *m == end_marker)
495            .unwrap();
496        (sb, eb, si, ei)
497    };
498    if end_idx <= start_idx {
499        return;
500    }
501
502    let delete_start = start_block_byte + byte_so_in_start;
503    let delete_end = end_block_byte + byte_eo_in_end;
504    if delete_end <= delete_start {
505        return;
506    }
507    let deleted_bytes = delete_end - delete_start;
508
509    // 1. Remove the rope range.
510    {
511        let mut rope = store.rope.write().unwrap();
512        let char_start = rope.byte_to_char(delete_start as usize);
513        let char_end = rope.byte_to_char(delete_end as usize);
514        rope.remove(char_start..char_end);
515    }
516
517    // 2. Remove block_offsets entries for [start_idx+1 ..= end_idx].
518    {
519        let mut offsets = store.block_offsets.write().unwrap();
520        offsets.drain_inclusive(start_idx + 1, end_idx);
521    }
522
523    // 3. Shift any remaining entries past the deletion by -deleted_bytes.
524    //    Threshold > delete_start because start_block's own entry
525    //    sits at delete_start - byte_so_in_start (≤ delete_start)
526    //    and must not move.
527    store
528        .block_offsets
529        .write()
530        .unwrap()
531        .shift_after(delete_start + 1, -(deleted_bytes as i32));
532}
533
534/// Insert a U+FFFC OBJECT REPLACEMENT CHARACTER sentinel in the rope
535/// at the table-anchor position, registering a `TableAnchor(table_id)`
536/// marker in the offset index (plan §1.6).
537///
538/// `target_block_id` is the block in the parent frame that the table
539/// is adjacent to. `after` controls whether the table goes BEFORE
540/// (`after = false`) or AFTER the target block.
541///
542/// The 3-byte sentinel is paired with an inter-marker `\n`:
543/// - `after = false`: inserts `\u{FFFC}\n` at `target.byte_start`
544/// - `after = true`, target is NOT the last entry: inserts
545///   `\u{FFFC}\n` at `target.byte_end` (between target's trailing
546///   `\n` and the next entry)
547/// - `after = true`, target IS the last entry: inserts `\n\u{FFFC}`
548///   at `target.byte_end` (rope now ends with the sentinel)
549///
550/// NOTE: cell-internal content is not yet routed through the rope.
551/// Cells remain in `Block.plain_text` until a follow-up commit adds
552/// the `Frame.byte_range` model from plan §1.6. The rope reflects
553/// table *presence* (3-byte sentinel) only.
554///
555/// No-op if `target_block_id` is not in the index.
556pub fn rope_insert_table_anchor(
557    store: &Store,
558    table_id: EntityId,
559    target_block_id: EntityId,
560    after: bool,
561) {
562    const SENTINEL: &str = "\u{FFFC}"; // 3 bytes
563    const SENTINEL_BYTES: u32 = 3;
564
565    let (insert_pos, target_idx, target_is_last) = {
566        let offsets = store.block_offsets.read().unwrap();
567        let target_marker = OffsetMarker::Block(target_block_id);
568        let Some((start, end)) = offsets.range_of(target_marker) else {
569            return;
570        };
571        let idx = offsets
572            .entries
573            .iter()
574            .position(|(m, _)| *m == target_marker)
575            .unwrap();
576        let is_last = idx + 1 == offsets.entries.len();
577        let pos = if after { end } else { start };
578        (pos, idx, is_last)
579    };
580
581    // Insertion strategy:
582    let (rope_inserted, marker_byte_start, new_entry_pos, shift_threshold, shift_delta) = if !after
583    {
584        // Before target: "\u{FFFC}\n" at target.byte_start
585        ("\u{FFFC}\n", insert_pos, target_idx, insert_pos, 4i32)
586    } else if !target_is_last {
587        // After target, with following entries: "\u{FFFC}\n"
588        // at target.byte_end. The TableAnchor sits where the
589        // next block USED to start; that following entry
590        // shifts by 4.
591        ("\u{FFFC}\n", insert_pos, target_idx + 1, insert_pos, 4i32)
592    } else {
593        // After target which is last: "\n\u{FFFC}" appended.
594        // TableAnchor's byte_start sits 1 past the original
595        // total (after the new `\n`).
596        (
597            "\n\u{FFFC}",
598            insert_pos + 1,
599            target_idx + 1,
600            insert_pos,
601            4i32,
602        )
603    };
604
605    // 1. Splice the literal bytes into the rope.
606    {
607        let mut rope = store.rope.write().unwrap();
608        let char_idx = rope.byte_to_char(insert_pos as usize);
609        rope.insert(char_idx, rope_inserted);
610    }
611
612    // 2. Shift entries past the insertion point. Use shift_after
613    //    BEFORE inserting our new entry so we don't double-shift.
614    store
615        .block_offsets
616        .write()
617        .unwrap()
618        .shift_after(shift_threshold, shift_delta);
619
620    // 3. Register the TableAnchor at the resolved byte position
621    //    and the resolved Vec position.
622    store.block_offsets.write().unwrap().insert_at(
623        new_entry_pos,
624        OffsetMarker::TableAnchor(table_id),
625        marker_byte_start,
626    );
627
628    // Note: SENTINEL_BYTES is part of `shift_delta` (3 for the
629    // sentinel + 1 for the `\n`).
630    let _ = SENTINEL;
631    let _ = SENTINEL_BYTES;
632}
633
634/// Append a U+FFFC table-anchor sentinel at the end of the rope and
635/// register a `TableAnchor(table_id)` marker. If the rope is already
636/// non-empty, prepends a `\n` boundary so the sentinel doesn't run
637/// into the previous entry's content.
638///
639/// Used by import paths (`import_html_uc`, `import_markdown_uc`)
640/// that process the document linearly and append entities as they
641/// encounter them, rather than inserting relative to an existing
642/// target block.
643pub fn rope_append_table_anchor(store: &Store, table_id: EntityId) {
644    let (anchor_byte_start, new_total) = {
645        let mut rope = store.rope.write().unwrap();
646        let was_empty = rope.len_bytes() == 0;
647        let char_end = rope.len_chars();
648        let to_insert = if was_empty { "\u{FFFC}" } else { "\n\u{FFFC}" };
649        rope.insert(char_end, to_insert);
650        let new_total = rope.len_bytes() as u32;
651        // Sentinel is 3 bytes; if a `\n` was prepended that's 1 byte
652        // before the sentinel.
653        let anchor_byte_start = new_total - 3;
654        (anchor_byte_start, new_total)
655    };
656
657    let mut offsets = store.block_offsets.write().unwrap();
658    offsets.push(OffsetMarker::TableAnchor(table_id), anchor_byte_start);
659    offsets.set_total_bytes(new_total);
660}
661
662/// Remove a TableAnchor sentinel from the rope, undoing the effect
663/// of `rope_insert_table_anchor`. Looks up the anchor's byte range
664/// (always 3 bytes for the U+FFFC plus 1 byte of inter-marker `\n`
665/// either before or after, depending on what's adjacent), removes
666/// those 4 bytes from the rope, drops the entry, shifts trailing
667/// entries by -4.
668///
669/// No-op if no TableAnchor for `table_id` exists.
670pub fn rope_remove_table_anchor(store: &Store, table_id: EntityId) {
671    let anchor_marker = OffsetMarker::TableAnchor(table_id);
672    let (anchor_byte_start, anchor_idx, anchor_is_last, has_predecessor) = {
673        let offsets = store.block_offsets.read().unwrap();
674        let Some((start, _end)) = offsets.range_of(anchor_marker) else {
675            return;
676        };
677        let idx = offsets
678            .entries
679            .iter()
680            .position(|(m, _)| *m == anchor_marker)
681            .unwrap();
682        let is_last = idx + 1 == offsets.entries.len();
683        let has_pred = idx > 0;
684        (start, idx, is_last, has_pred)
685    };
686
687    // Symmetric to insert_table_anchor. The 4 bytes to remove are:
688    // - if anchor is last: [byte_start - 1 .. byte_start + 3) — the
689    //   preceding `\n` + the 3-byte sentinel
690    // - otherwise: [byte_start .. byte_start + 4) — the sentinel
691    //   + the following `\n`
692    let (remove_start, remove_end) = if anchor_is_last && has_predecessor {
693        (anchor_byte_start - 1, anchor_byte_start + 3)
694    } else {
695        (anchor_byte_start, anchor_byte_start + 4)
696    };
697
698    {
699        let mut rope = store.rope.write().unwrap();
700        let char_start = rope.byte_to_char(remove_start as usize);
701        let char_end = rope.byte_to_char(remove_end as usize);
702        rope.remove(char_start..char_end);
703    }
704    {
705        let mut offsets = store.block_offsets.write().unwrap();
706        offsets.remove_at(anchor_idx);
707    }
708    store
709        .block_offsets
710        .write()
711        .unwrap()
712        .shift_after(remove_start, -4);
713}
714
715/// Remove a registered block from the rope: drops its content bytes
716/// plus one boundary `\n` (the one after, if the block has a
717/// successor; the one before, if it's the last entry), removes the
718/// entry from the index, and shifts trailing entries by the negative
719/// byte delta.
720///
721/// No-op if `block_id` is not in the index. No-op for the special
722/// case of a single-block document being asked to remove its sole
723/// block (we'd produce an empty rope but the block itself is being
724/// cascaded by the caller).
725pub fn rope_remove_block(store: &Store, block_id: EntityId) {
726    let block_marker = OffsetMarker::Block(block_id);
727    let (block_start, block_end, idx, is_last, has_pred) = {
728        let offsets = store.block_offsets.read().unwrap();
729        let Some((start, end)) = offsets.range_of(block_marker) else {
730            return;
731        };
732        let idx = offsets
733            .entries
734            .iter()
735            .position(|(m, _)| *m == block_marker)
736            .unwrap();
737        let is_last = idx + 1 == offsets.entries.len();
738        let has_pred = idx > 0;
739        (start, end, idx, is_last, has_pred)
740    };
741
742    // Determine the byte range to delete:
743    // - if there's a successor: [block_start..block_end) — the
744    //   block's content INCLUDING its trailing boundary `\n`
745    //   (which is the byte at block_end - 1)
746    // - if last and has predecessor: [block_start - 1..block_end)
747    //   — also delete the LEADING boundary `\n` that the previous
748    //   entry placed before us
749    // - if last and no predecessor (sole entry): just delete
750    //   [block_start..block_end) (no boundary `\n` exists)
751    let (remove_start, remove_end) = if is_last && has_pred {
752        (block_start.saturating_sub(1), block_end)
753    } else {
754        (block_start, block_end)
755    };
756    if remove_end <= remove_start {
757        // Drop the entry only; nothing to remove from the rope.
758        store.block_offsets.write().unwrap().remove_at(idx);
759        return;
760    }
761    let deleted_bytes = remove_end - remove_start;
762
763    {
764        let mut rope = store.rope.write().unwrap();
765        let char_start = rope.byte_to_char(remove_start as usize);
766        let char_end = rope.byte_to_char(remove_end as usize);
767        rope.remove(char_start..char_end);
768    }
769    store.block_offsets.write().unwrap().remove_at(idx);
770    // Shift entries STRICTLY PAST the removed range. Using `remove_end` as
771    // the threshold (rather than `remove_start`) keeps an empty predecessor
772    // whose byte_start equals `remove_start` (the leading boundary `\n` we
773    // just deleted) in place — its content position is unchanged, only its
774    // trailing boundary is gone. `total_bytes` decreases by `deleted_bytes`
775    // regardless of threshold.
776    store
777        .block_offsets
778        .write()
779        .unwrap()
780        .shift_after(remove_end, -(deleted_bytes as i32));
781}
782
783/// Replace the entire content of a registered block in the rope with
784/// `new_text`. Preserves the block's `byte_start` and its trailing
785/// boundary `\n` (if any); subsequent entries shift by the net
786/// length delta.
787///
788/// Used by use cases that compute a block's final content as a string
789/// and want to push that content to the rope in one shot — e.g. the
790/// block-splitting branches of `insert_html_at_position_uc` and
791/// `insert_markdown_at_position_uc`, where each affected block
792/// (head, tail, mid-replacement) gets a single new value.
793///
794/// No-op if `block_id` is not in the index.
795pub fn rope_replace_block_content(store: &Store, block_id: EntityId, new_text: &str) {
796    let (block_byte_start, content_bytes) = {
797        let offsets = store.block_offsets.read().unwrap();
798        let Some((start, end)) = offsets.range_of_block(block_id) else {
799            return;
800        };
801        let total = offsets.total_bytes();
802        // `range_of` extends to the next entry's `byte_start` (or to
803        // `total_bytes`). If there's a following entry, the byte at
804        // `end - 1` is the inter-block boundary `\n` that belongs to
805        // the boundary between this block and the next, not to this
806        // block's content.
807        let has_trailing_boundary = end < total;
808        let content_bytes = if has_trailing_boundary {
809            end - start - 1
810        } else {
811            end - start
812        };
813        (start, content_bytes)
814    };
815
816    let new_bytes = new_text.len() as u32;
817    if content_bytes == 0 && new_bytes == 0 {
818        return;
819    }
820    let delta = new_bytes as i32 - content_bytes as i32;
821
822    // Splice [block_byte_start..block_byte_start + content_bytes)
823    // with `new_text`.
824    {
825        let mut rope = store.rope.write().unwrap();
826        let char_start = rope.byte_to_char(block_byte_start as usize);
827        if content_bytes > 0 {
828            let char_end = rope.byte_to_char((block_byte_start + content_bytes) as usize);
829            rope.remove(char_start..char_end);
830        }
831        if new_bytes > 0 {
832            rope.insert(char_start, new_text);
833        }
834    }
835
836    if delta != 0 {
837        // Shift entries that sit strictly past this block's start
838        // (i.e. the trailing boundary and everything after).
839        store
840            .block_offsets
841            .write()
842            .unwrap()
843            .shift_after(block_byte_start + 1, delta);
844    }
845}
846
847/// Delete bytes `[byte_start_in_block..byte_end_in_block)` from inside
848/// the block identified by `block_id`. Shifts subsequent block offsets
849/// by the deleted byte length. No-op for blocks not in the index.
850pub fn rope_delete_in_block(
851    store: &Store,
852    block_id: EntityId,
853    byte_start_in_block: u32,
854    byte_end_in_block: u32,
855) {
856    if byte_end_in_block <= byte_start_in_block {
857        return;
858    }
859    let deleted_bytes = byte_end_in_block - byte_start_in_block;
860    let block_byte_start = {
861        let offsets = store.block_offsets.read().unwrap();
862        let Some((start, _end)) = offsets.range_of_block(block_id) else {
863            return;
864        };
865        start
866    };
867    let rope_byte_start = block_byte_start + byte_start_in_block;
868    let rope_byte_end = block_byte_start + byte_end_in_block;
869    {
870        let mut rope = store.rope.write().unwrap();
871        let char_start = rope.byte_to_char(rope_byte_start as usize);
872        let char_end = rope.byte_to_char(rope_byte_end as usize);
873        rope.remove(char_start..char_end);
874    }
875    store
876        .block_offsets
877        .write()
878        .unwrap()
879        .shift_after(block_byte_start + 1, -(deleted_bytes as i32));
880}
881
882/// Recompute `Frame.byte_range` for every frame in `store.frames`
883/// based on current `block_offsets` and the frame tree structure.
884/// Plan §1.6 invariant: each frame's byte_range is the (min_start,
885/// max_end) over all its descendant blocks, sub-frames, and table
886/// anchors+cells.
887///
888/// Call this after any mutation that affects rope byte positions.
889/// O(F + B) where F = frames, B = blocks in the document.
890pub fn recompute_all_frame_byte_ranges(store: &Store) {
891    let frame_ids: Vec<EntityId> = {
892        let frames = store.frames.read().unwrap();
893        frames.keys().copied().collect()
894    };
895    for fid in frame_ids {
896        let new_range = compute_frame_byte_range_recursive(store, fid);
897        let mut frames = store.frames.write().unwrap();
898        if let Some(f) = frames.get(&fid).cloned()
899            && f.byte_range != new_range
900        {
901            let mut updated = f;
902            updated.byte_range = new_range;
903            frames.insert(fid, updated);
904        }
905    }
906}
907
908fn compute_frame_byte_range_recursive(store: &Store, frame_id: EntityId) -> (u32, u32) {
909    let mut bounds: Option<(u32, u32)> = None;
910    walk_frame_bounds(store, frame_id, &mut bounds);
911    bounds.unwrap_or((0, 0))
912}
913
914fn walk_frame_bounds(store: &Store, frame_id: EntityId, bounds: &mut Option<(u32, u32)>) {
915    fn merge(bounds: &mut Option<(u32, u32)>, s: u32, e: u32) {
916        *bounds = Some(match *bounds {
917            None => (s, e),
918            Some((min, max)) => (min.min(s), max.max(e)),
919        });
920    }
921
922    let (blocks, child_order, table_id) = {
923        let frames = store.frames.read().unwrap();
924        let Some(f) = frames.get(&frame_id) else {
925            return;
926        };
927        (f.blocks.clone(), f.child_order.clone(), f.table)
928    };
929
930    {
931        let offsets = store.block_offsets.read().unwrap();
932        for bid in &blocks {
933            if let Some((s, e)) = offsets.range_of_block(*bid) {
934                merge(bounds, s, e);
935            }
936        }
937        if let Some(tid) = table_id
938            && let Some((s, e)) = offsets.range_of(OffsetMarker::TableAnchor(tid))
939        {
940            merge(bounds, s, e);
941        }
942    }
943
944    for entry in &child_order {
945        if *entry < 0 {
946            walk_frame_bounds(store, (-*entry) as EntityId, bounds);
947        }
948    }
949
950    if let Some(tid) = table_id {
951        let cell_ids: Vec<EntityId> = {
952            let tables = store.tables.read().unwrap();
953            tables
954                .get(&tid)
955                .map(|t| t.cells.clone())
956                .unwrap_or_default()
957        };
958        for cell_id in &cell_ids {
959            let cell_frame_id = {
960                let cells = store.table_cells.read().unwrap();
961                cells.get(cell_id).and_then(|c| c.cell_frame)
962            };
963            if let Some(cfid) = cell_frame_id {
964                walk_frame_bounds(store, cfid, bounds);
965            }
966        }
967    }
968}