Skip to main content

common/database/
rope_helpers.rs

1//! Helpers for writing the global character rope from use cases.
2//!
3//! Each helper mutates `store.rope` and `store.block_offsets`
4//! together so callers can stay oblivious to the underlying layout.
5//! Read helpers (`block_content_via_store`) source content from the
6//! rope and return empty when a block is not yet registered in the
7//! offset index.
8
9use crate::database::Store;
10use crate::database::block_offset_index::OffsetMarker;
11use crate::entities::Block;
12use crate::types::EntityId;
13
14/// Read a block's content from the global rope via `block_offsets`,
15/// stripping the trailing `\n` boundary that `range_of` includes for
16/// non-last entries. Returns an empty string if the block isn't
17/// registered in the offset index (e.g. a freshly-created block that
18/// hasn't been spliced into the rope yet — `setup_with_text` test
19/// docs use this path).
20pub fn block_content_via_store(block: &Block, store: &Store) -> String {
21    let offsets = store.block_offsets.read().unwrap();
22    let marker = OffsetMarker::Block(block.id);
23    let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
24        return String::new();
25    };
26    // Drop the trailing inter-block boundary `\n` ONLY when this block
27    // has a successor entry — that one byte is the boundary `\n` between
28    // this block and the next. The last entry has no trailing boundary;
29    // any final `\n` is real content.
30    let content_end = if has_successor && be > bs { be - 1 } else { be };
31    drop(offsets);
32    let rope = store.rope.read().unwrap();
33    rope.byte_slice(bs as usize..content_end as usize)
34        .to_string()
35}
36
37/// Logical character count of a block — what the old
38/// `Block.text_length` field used to cache. Image anchors are stored as
39/// `\u{FFFC}` (one char, three bytes) inside the rope content, so the
40/// char count already covers them. Returns 0 for blocks not registered
41/// in the offset index.
42///
43/// O(log n) via `ropey::Rope::byte_to_char` — does NOT materialize the
44/// block's text into a String. Replaces the prior O(L) implementation
45/// that counted chars by walking UTF-8 over a cloned slice.
46pub fn block_char_length(block: &Block, store: &Store) -> i64 {
47    let offsets = store.block_offsets.read().unwrap();
48    let marker = OffsetMarker::Block(block.id);
49    let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
50        return 0;
51    };
52    let content_end_bytes = if has_successor && be > bs { be - 1 } else { be };
53    drop(offsets);
54    let rope = store.rope.read().unwrap();
55    let char_start = rope.byte_to_char(bs as usize);
56    let char_end = rope.byte_to_char(content_end_bytes as usize);
57    (char_end - char_start) as i64
58}
59
60/// Return the absolute character position of a block's start in the
61/// document, derived from the rope via `BlockOffsetIndex`.
62///
63/// O(log n): one `range_of_block` lookup + one `byte_to_char` conversion.
64/// Falls back to `block.document_position` for blocks not registered in
65/// the index — currently table-cell blocks (plan §1.6 deferred) and
66/// blocks under non-top-level frames (`insert_frame_uc` only mirrors
67/// top-level frames). The stored field is set authoritatively by those
68/// non-flow paths and stays correct for them across main-flow edits.
69pub fn block_document_position(block: &Block, store: &Store) -> i64 {
70    let offsets = store.block_offsets.read().unwrap();
71    let Some((byte_start, _)) = offsets.range_of_block(block.id) else {
72        return block.document_position;
73    };
74    drop(offsets);
75    let rope = store.rope.read().unwrap();
76    rope.byte_to_char(byte_start as usize) as i64
77}
78
79/// Whether the rope's char-position space matches the user-visible
80/// flow positions that `Block.document_position` is computed against.
81///
82/// They match when every block is mirrored to the rope as a contiguous
83/// run. They DON'T match when:
84/// 1. The document contains tables — cell content sits at separate rope
85///    byte ranges (plan §1.6), so the rope is missing the cells'
86///    flow-position contribution.
87/// 2. The document contains sub-frames whose blocks aren't mirrored —
88///    `insert_frame_uc` only mirrors top-level frames.
89///
90/// When this returns `true`, readers can derive `document_position`
91/// directly from the rope (O(log n)) and the use-case-side
92/// position-refresh loops can be skipped. When it returns `false`,
93/// readers must consult the maintained `Block.document_position`
94/// stored field, and the loops are required to keep it correct.
95pub fn rope_positions_match_flow(store: &Store) -> bool {
96    let offsets = store.block_offsets.read().unwrap();
97    // The rope is authoritative as long as EVERY block is mirrored into it.
98    // Tables no longer disqualify it: cell content is mirrored inline, in
99    // document order, and the table itself occupies a 1-char anchor sentinel
100    // — so the rope's char space matches the user-visible flow order. (The
101    // flow snapshot derives its block positions from this same rope space,
102    // so the two agree by construction.) Only count Block markers; the
103    // TableAnchor sentinel entries are not blocks.
104    let indexed_block_count = offsets.entries.iter().filter(|(m, _)| m.is_block()).count();
105    drop(offsets);
106    let total_block_count = store.blocks.read().unwrap().len();
107    indexed_block_count == total_block_count
108}
109
110/// Locate which block contains a given absolute char position in the
111/// document, returning `(block_id, char_offset_in_block, block_char_start)`
112/// in O(log n) using the rope + `BlockOffsetIndex` instead of an O(N)
113/// linear walk of all blocks.
114///
115/// Replaces the per-keystroke hot path in editing use cases
116/// (`find_block_at_position_sequential`) which fetched every block
117/// + called `block_char_length` per block. For an N-block document
118///   each editor keystroke now costs O(log n) lookups instead of O(N).
119///
120/// Returns `None` for documents containing tables — table cell content
121/// lives in separate byte ranges later in the rope (plan §1.6), so the
122/// rope's char-position space diverges from the user-visible flow
123/// order. Callers must fall back to the slow per-block walk in that
124/// case.
125///
126/// `position` past the document end clamps to the last block's
127/// end-of-content.
128pub fn find_block_at_char_position(store: &Store, position: i64) -> Option<(EntityId, i64, i64)> {
129    // Fast path is only valid when EVERY block in the document is
130    // mirrored to the rope. Disqualifying cases:
131    //
132    // 1. Documents containing tables — cell content sits at separate
133    //    rope byte ranges (plan §1.6) so byte→block lookup finds the
134    //    wrong block for cursor positions inside cells.
135    //
136    // 2. Documents containing sub-frames inserted with a parent —
137    //    `insert_frame_uc` currently only mirrors top-level frames to
138    //    the rope (parent=None case), leaving sub-frame blocks
139    //    unregistered in the offset index. Detect by comparing block
140    //    counts: if the rope index has fewer Block markers than the
141    //    store has Block entities, some are unmirrored.
142    let offsets = store.block_offsets.read().unwrap();
143    // Valid as long as every block is mirrored to the rope. Tables are fine:
144    // cell content is mirrored inline in document order and the table is a
145    // 1-char anchor sentinel, so byte→block lookup resolves correctly for any
146    // non-sentinel position. A position that lands exactly on the sentinel
147    // resolves to a TableAnchor marker, where `as_block()` returns None and
148    // the caller falls back to the slow walk. Only count Block markers.
149    let indexed_block_count = offsets.entries.iter().filter(|(m, _)| m.is_block()).count();
150    let total_block_count = store.blocks.read().unwrap().len();
151    if indexed_block_count != total_block_count {
152        return None;
153    }
154    drop(offsets);
155
156    let rope = store.rope.read().unwrap();
157    let total_chars = rope.len_chars() as i64;
158    let pos_clamped = position.clamp(0, total_chars);
159    let abs_byte = rope.char_to_byte(pos_clamped as usize);
160    drop(rope);
161
162    let offsets = store.block_offsets.read().unwrap();
163    let block_id = offsets.marker_at_byte(abs_byte as u32)?.as_block()?;
164    let (bs, be, has_successor) = offsets.range_with_successor(OffsetMarker::Block(block_id))?;
165    let content_end = if has_successor && be > bs { be - 1 } else { be };
166
167    drop(offsets);
168
169    let rope = store.rope.read().unwrap();
170    let block_char_start = rope.byte_to_char(bs as usize) as i64;
171    // Clamp the cursor's byte to the block's content area (not into the
172    // trailing `\n` boundary if any).
173    let byte_for_char = std::cmp::min(abs_byte, content_end as usize);
174    let abs_char = rope.byte_to_char(byte_for_char) as i64;
175    let char_in_block = abs_char - block_char_start;
176
177    // NOTE: separator semantics differ across use cases. Callers like
178    // `get_block_at_position_uc` interpret "position at the end of a
179    // non-empty block with a successor" as "belongs to the next
180    // block"; callers like `insert_text_uc` want the previous block
181    // with offset == block_char_len. This helper returns the
182    // previous-block answer (the simpler semantic); use-case-specific
183    // advance logic lives at the call site.
184    let _ = has_successor;
185
186    Some((block_id, char_in_block, block_char_start))
187}
188
189/// Convert an in-block char offset to an in-block byte offset using the
190/// rope's index. Both inputs and outputs are relative to the start of
191/// the block's content (NOT absolute rope positions). The char offset
192/// is clamped to the block's logical length so callers don't need to
193/// pre-validate.
194///
195/// O(log n) via `ropey::Rope::char_to_byte` — replaces the O(L)
196/// "materialize block text + walk char_indices" pattern that
197/// `set_text_format_uc` / `merge_text_format_uc` used. Returns
198/// `(byte_offset_in_block, content_byte_len)` so callers can also
199/// pass the content length to `debug_assert_well_formed` without
200/// materializing the text.
201///
202/// Returns `(0, 0)` for blocks not registered in the offset index.
203pub fn block_char_to_byte_in_block(
204    store: &Store,
205    block_id: EntityId,
206    char_offset: usize,
207) -> (u32, usize) {
208    let offsets = store.block_offsets.read().unwrap();
209    let marker = OffsetMarker::Block(block_id);
210    let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
211        return (0, 0);
212    };
213    let content_end_bytes = if has_successor && be > bs { be - 1 } else { be };
214    let content_byte_len = (content_end_bytes - bs) as usize;
215    drop(offsets);
216
217    let rope = store.rope.read().unwrap();
218    let block_char_start = rope.byte_to_char(bs as usize);
219    let block_char_end = rope.byte_to_char(content_end_bytes as usize);
220    let block_char_len = block_char_end - block_char_start;
221
222    // Clamp char_offset to block's char length.
223    let clamped = std::cmp::min(char_offset, block_char_len);
224    let abs_byte = rope.char_to_byte(block_char_start + clamped);
225    let byte_in_block = (abs_byte - bs as usize) as u32;
226    (byte_in_block, content_byte_len)
227}
228
229/// Fast-path full-document plain text: returns `Some(rope.to_string())`
230/// iff the document is in the canonical flat layout — no table anchors
231/// in the offset index, single top-level frame. In that case the
232/// rope's byte order is the same as the document-flow order, so one
233/// `to_string()` allocation replaces the O(N) per-block walk +
234/// per-block `Cow<str>` materialization that `build_full_text` /
235/// `export_plain_text` would otherwise do.
236///
237/// Returns `None` for documents containing tables or nested frames —
238/// those require the per-frame, per-block traversal because table
239/// cell content lives in separate byte ranges later in the rope
240/// (plan §1.6), not interleaved with the parent frame's bytes.
241///
242/// `top_frame_count` is the caller-known number of top-level frames
243/// (typically obtained from `Document.frames.len()`). Callers
244/// already have this value and pass it in to avoid a redundant uow
245/// query.
246pub fn rope_flat_text_if_simple(store: &Store, top_frame_count: usize) -> Option<String> {
247    if top_frame_count != 1 {
248        return None;
249    }
250    let offsets = store.block_offsets.read().unwrap();
251    let has_table = offsets
252        .entries
253        .iter()
254        .any(|(m, _)| matches!(m, OffsetMarker::TableAnchor(_)));
255    if has_table {
256        return None;
257    }
258    drop(offsets);
259    Some(store.rope.read().unwrap().to_string())
260}
261
262/// Reset the rope to empty and clear `block_offsets`. Called by
263/// importers when they replace the entire document content.
264pub fn rope_reset(store: &Store) {
265    *store.rope.write().unwrap() = ropey::Rope::new();
266    *store.block_offsets.write().unwrap() =
267        crate::database::block_offset_index::BlockOffsetIndex::new();
268}
269
270/// Append `text` to the end of the rope and register `block_id` at
271/// the byte position where the text starts. Returns that byte offset.
272///
273/// Callers are responsible for inserting an inter-block `\n`
274/// (`rope_insert_block_boundary`) before each block AFTER the first
275/// in a contiguous frame.
276pub fn rope_append_block(store: &Store, block_id: EntityId, text: &str) -> u32 {
277    let mut rope = store.rope.write().unwrap();
278    let byte_start = rope.len_bytes() as u32;
279    let char_end = rope.len_chars();
280    rope.insert(char_end, text);
281    let new_total = rope.len_bytes() as u32;
282    drop(rope);
283
284    let mut offsets = store.block_offsets.write().unwrap();
285    offsets.push_block(block_id, byte_start);
286    offsets.set_total_bytes(new_total);
287    byte_start
288}
289
290/// Insert `text` as a new block at `byte_pos` in the rope, prepending
291/// a `\n` boundary. Used by `insert_table_uc` to place cell blocks at
292/// the end of their containing top-level frame's range (plan §1.6),
293/// rather than always at rope end.
294///
295/// Total bytes inserted: `1 + text.len()`. The block's content
296/// occupies `[byte_pos + 1, byte_pos + 1 + text.len())`. The block
297/// entry is registered at `byte_pos + 1` in `block_offsets`.
298///
299/// Existing entries with `byte_start == byte_pos` (e.g. a previous
300/// empty block whose end coincides with this insertion point) are
301/// kept BEFORE the new entry in the Vec, since the inserted `\n`
302/// boundary belongs after them. Entries strictly past `byte_pos`
303/// shift forward by `(1 + text.len())` bytes.
304///
305/// When `byte_pos == total_bytes`, behaves like
306/// `rope_insert_block_boundary` followed by `rope_append_block`.
307pub fn rope_insert_block_at(store: &Store, byte_pos: u32, block_id: EntityId, text: &str) {
308    let delta = (1 + text.len()) as i32;
309    // Vec position: insert AFTER any entry at byte_pos itself
310    // (those represent earlier empty blocks whose `\n` boundary
311    // we are placing now). Only entries strictly past byte_pos
312    // come after our new entry in the Vec.
313    let new_entry_vec_pos = {
314        let offsets = store.block_offsets.read().unwrap();
315        offsets
316            .entries
317            .iter()
318            .position(|(_, bs)| *bs > byte_pos)
319            .unwrap_or(offsets.entries.len())
320    };
321    {
322        let mut rope = store.rope.write().unwrap();
323        let char_idx = rope.byte_to_char(byte_pos as usize);
324        let mut combined = String::with_capacity(1 + text.len());
325        combined.push('\n');
326        combined.push_str(text);
327        rope.insert(char_idx, &combined);
328    }
329    let mut offsets = store.block_offsets.write().unwrap();
330    // Shift entries strictly past byte_pos. Entries AT byte_pos
331    // (the prior empty block) stay where they are — the new `\n`
332    // is conceptually "after" them.
333    offsets.shift_after(byte_pos + 1, delta);
334    offsets.insert_at(
335        new_entry_vec_pos,
336        OffsetMarker::Block(block_id),
337        byte_pos + 1,
338    );
339}
340
341/// Walks up `frame.parent_frame` to find the top-level ancestor of
342/// the given frame, then returns the end byte of that top-level
343/// frame's current rope range — i.e. the byte position where blocks
344/// belonging to that frame's subtree (e.g. table cells per plan §1.6)
345/// should be inserted so they land BEFORE any following top-level
346/// frame's content.
347///
348/// Reads `block_offsets`/`frames`/`tables`/`table_cells` directly, so
349/// the result is fresh even when `Frame.byte_range` has not yet been
350/// recomputed at commit time.
351pub fn top_level_frame_end_byte(store: &Store, frame_id: EntityId) -> u32 {
352    let top_id = {
353        let frames = store.frames.read().unwrap();
354        let mut current = frame_id;
355        loop {
356            let Some(f) = frames.get(&current) else {
357                return 0;
358            };
359            match f.parent_frame {
360                None => break current,
361                Some(p) => current = p,
362            }
363        }
364    };
365    let (_min, max) = compute_frame_byte_range_recursive(store, top_id);
366    max
367}
368
369/// Append a new empty block to the end of the rope, separating it
370/// from any prior content with a `\n` boundary (only if the rope is
371/// already non-empty). Registers `block_id` at the resulting byte
372/// position. Returns that byte position. Used when `insert_frame_uc`
373/// creates a new top-level frame with a single empty block.
374pub fn rope_append_empty_block(store: &Store, block_id: EntityId) -> u32 {
375    let was_empty = store.rope.read().unwrap().len_bytes() == 0;
376    if !was_empty {
377        rope_insert_block_boundary(store);
378    }
379    let pos = store.rope.read().unwrap().len_bytes() as u32;
380    let mut offsets = store.block_offsets.write().unwrap();
381    offsets.push_block(block_id, pos);
382    offsets.set_total_bytes(pos);
383    pos
384}
385
386/// Append a single `\n` inter-block boundary character to the end of
387/// the rope. Does NOT register a block — this is the sentinel between
388/// two adjacent blocks within the same frame (plan §1.4).
389pub fn rope_insert_block_boundary(store: &Store) {
390    let mut rope = store.rope.write().unwrap();
391    let char_end = rope.len_chars();
392    rope.insert(char_end, "\n");
393    let new_total = rope.len_bytes() as u32;
394    drop(rope);
395
396    store
397        .block_offsets
398        .write()
399        .unwrap()
400        .set_total_bytes(new_total);
401}
402
403/// Insert `text` at `byte_offset_in_block` inside the block identified
404/// by `block_id`. Looks up the block's start in the rope via
405/// `block_offsets.range_of()`, splices into the rope, and shifts
406/// subsequent block offsets by the inserted byte length.
407///
408/// Silently no-ops if the block is not registered in the offset index
409/// (this can happen for blocks whose content lives outside the global
410/// rope, e.g. table cells until step 5.5).
411pub fn rope_insert_in_block(
412    store: &Store,
413    block_id: EntityId,
414    byte_offset_in_block: u32,
415    text: &str,
416) {
417    let inserted_bytes = text.len() as u32;
418    if inserted_bytes == 0 {
419        return;
420    }
421    let block_byte_start = {
422        let offsets = store.block_offsets.read().unwrap();
423        let Some((start, _end)) = offsets.range_of_block(block_id) else {
424            return;
425        };
426        start
427    };
428    let rope_byte = block_byte_start + byte_offset_in_block;
429    {
430        let mut rope = store.rope.write().unwrap();
431        let char_idx = rope.byte_to_char(rope_byte as usize);
432        rope.insert(char_idx, text);
433    }
434    // Shift entries past this block by inserted_bytes. Threshold
435    // is one byte past block_byte_start so the current block's own
436    // entry isn't moved.
437    store
438        .block_offsets
439        .write()
440        .unwrap()
441        .shift_after(block_byte_start + 1, inserted_bytes as i32);
442}
443
444/// Split an existing block in the rope at `byte_offset_in_block`:
445/// - inserts a `\n` inter-block boundary at the absolute byte position
446///   `block_start + byte_offset_in_block` in the rope
447/// - shifts entries past that position by +1 byte
448/// - inserts a new entry for `new_block_id` at
449///   `block_start + byte_offset_in_block + 1` (right after the newline),
450///   placed immediately after the original block in the entries Vec
451///
452/// `byte_offset_in_block` may be 0 (split before first char of block,
453/// i.e. insert empty block before this one) or equal to the block's
454/// byte length (split after last char, i.e. insert empty block after).
455pub fn rope_split_block(
456    store: &Store,
457    current_block_id: EntityId,
458    byte_offset_in_block: u32,
459    new_block_id: EntityId,
460) {
461    let current_marker = OffsetMarker::Block(current_block_id);
462    let (block_start, current_idx) = {
463        let offsets = store.block_offsets.read().unwrap();
464        let Some((start, _end)) = offsets.range_of(current_marker) else {
465            return;
466        };
467        let idx = offsets
468            .entries
469            .iter()
470            .position(|(m, _)| *m == current_marker)
471            .unwrap();
472        (start, idx)
473    };
474    let split_byte = block_start + byte_offset_in_block;
475
476    // 1. Insert the `\n` boundary at the split point.
477    {
478        let mut rope = store.rope.write().unwrap();
479        let char_idx = rope.byte_to_char(split_byte as usize);
480        rope.insert(char_idx, "\n");
481    }
482
483    // 2. Shift entries past the split (and total_bytes) by +1.
484    //    Threshold > split_byte so the new entry we insert next
485    //    isn't double-shifted.
486    store
487        .block_offsets
488        .write()
489        .unwrap()
490        .shift_after(split_byte + 1, 1);
491
492    // 3. Register the new block at `split_byte + 1`, immediately
493    //    after the original in the entries Vec.
494    store.block_offsets.write().unwrap().insert_at(
495        current_idx + 1,
496        OffsetMarker::Block(new_block_id),
497        split_byte + 1,
498    );
499}
500
501/// Merge `start_block` and `end_block` by deleting the rope range
502/// `[start_block.start + byte_so .. end_block.start + byte_eo)` — i.e.
503/// the suffix of `start_block`, every block between (and their
504/// boundary newlines), and the prefix of `end_block`. Removes the
505/// index entries for every block strictly between `start_block` and
506/// `end_block` (inclusive of `end_block` itself); the surviving
507/// content lives in `start_block`. Shifts any blocks past `end_block`
508/// by the negative delta.
509///
510/// No-op if `start_block` is not in the index. Skipped for any
511/// intermediate block id whose range is missing from the index (e.g.
512/// table cells until step 5.5e).
513pub fn rope_merge_block_range(
514    store: &Store,
515    start_block_id: EntityId,
516    byte_so_in_start: u32,
517    end_block_id: EntityId,
518    byte_eo_in_end: u32,
519) {
520    let start_marker = OffsetMarker::Block(start_block_id);
521    let end_marker = OffsetMarker::Block(end_block_id);
522    let (start_block_byte, end_block_byte, start_idx, end_idx) = {
523        let offsets = store.block_offsets.read().unwrap();
524        let Some((sb, _)) = offsets.range_of(start_marker) else {
525            return;
526        };
527        let Some((eb, _)) = offsets.range_of(end_marker) else {
528            return;
529        };
530        let si = offsets
531            .entries
532            .iter()
533            .position(|(m, _)| *m == start_marker)
534            .unwrap();
535        let ei = offsets
536            .entries
537            .iter()
538            .position(|(m, _)| *m == end_marker)
539            .unwrap();
540        (sb, eb, si, ei)
541    };
542    if end_idx <= start_idx {
543        return;
544    }
545
546    let delete_start = start_block_byte + byte_so_in_start;
547    let delete_end = end_block_byte + byte_eo_in_end;
548    if delete_end <= delete_start {
549        return;
550    }
551    let deleted_bytes = delete_end - delete_start;
552
553    // 1. Remove the rope range.
554    {
555        let mut rope = store.rope.write().unwrap();
556        let char_start = rope.byte_to_char(delete_start as usize);
557        let char_end = rope.byte_to_char(delete_end as usize);
558        rope.remove(char_start..char_end);
559    }
560
561    // 2. Remove block_offsets entries for [start_idx+1 ..= end_idx].
562    {
563        let mut offsets = store.block_offsets.write().unwrap();
564        offsets.drain_inclusive(start_idx + 1, end_idx);
565    }
566
567    // 3. Shift any remaining entries past the deletion by -deleted_bytes.
568    //    Threshold > delete_start because start_block's own entry
569    //    sits at delete_start - byte_so_in_start (≤ delete_start)
570    //    and must not move.
571    store
572        .block_offsets
573        .write()
574        .unwrap()
575        .shift_after(delete_start + 1, -(deleted_bytes as i32));
576}
577
578/// Insert a U+FFFC OBJECT REPLACEMENT CHARACTER sentinel in the rope
579/// at the table-anchor position, registering a `TableAnchor(table_id)`
580/// marker in the offset index (plan §1.6).
581///
582/// `target_block_id` is the block in the parent frame that the table
583/// is adjacent to. `after` controls whether the table goes BEFORE
584/// (`after = false`) or AFTER the target block.
585///
586/// The 3-byte sentinel is paired with an inter-marker `\n`:
587/// - `after = false`: inserts `\u{FFFC}\n` at `target.byte_start`
588/// - `after = true`, target is NOT the last entry: inserts
589///   `\u{FFFC}\n` at `target.byte_end` (between target's trailing
590///   `\n` and the next entry)
591/// - `after = true`, target IS the last entry: inserts `\n\u{FFFC}`
592///   at `target.byte_end` (rope now ends with the sentinel)
593///
594/// NOTE: cell-internal content is not yet routed through the rope —
595/// the rope reflects table *presence* (3-byte sentinel) only.
596/// Routing cell content is deferred to plan §1.6's `Frame.byte_range`
597/// model.
598///
599/// No-op if `target_block_id` is not in the index.
600pub fn rope_insert_table_anchor(
601    store: &Store,
602    table_id: EntityId,
603    target_block_id: EntityId,
604    after: bool,
605) {
606    const SENTINEL: &str = "\u{FFFC}"; // 3 bytes
607    const SENTINEL_BYTES: u32 = 3;
608
609    let (insert_pos, target_idx, target_is_last) = {
610        let offsets = store.block_offsets.read().unwrap();
611        let target_marker = OffsetMarker::Block(target_block_id);
612        let Some((start, end)) = offsets.range_of(target_marker) else {
613            return;
614        };
615        let idx = offsets
616            .entries
617            .iter()
618            .position(|(m, _)| *m == target_marker)
619            .unwrap();
620        let is_last = idx + 1 == offsets.entries.len();
621        let pos = if after { end } else { start };
622        (pos, idx, is_last)
623    };
624
625    // Insertion strategy:
626    let (rope_inserted, marker_byte_start, new_entry_pos, shift_threshold, shift_delta) = if !after
627    {
628        // Before target: "\u{FFFC}\n" at target.byte_start
629        ("\u{FFFC}\n", insert_pos, target_idx, insert_pos, 4i32)
630    } else if !target_is_last {
631        // After target, with following entries: "\u{FFFC}\n"
632        // at target.byte_end. The TableAnchor sits where the
633        // next block USED to start; that following entry
634        // shifts by 4.
635        ("\u{FFFC}\n", insert_pos, target_idx + 1, insert_pos, 4i32)
636    } else {
637        // After target which is last: "\n\u{FFFC}" appended.
638        // TableAnchor's byte_start sits 1 past the original
639        // total (after the new `\n`).
640        (
641            "\n\u{FFFC}",
642            insert_pos + 1,
643            target_idx + 1,
644            insert_pos,
645            4i32,
646        )
647    };
648
649    // 1. Splice the literal bytes into the rope.
650    {
651        let mut rope = store.rope.write().unwrap();
652        let char_idx = rope.byte_to_char(insert_pos as usize);
653        rope.insert(char_idx, rope_inserted);
654    }
655
656    // 2. Shift entries past the insertion point. Use shift_after
657    //    BEFORE inserting our new entry so we don't double-shift.
658    store
659        .block_offsets
660        .write()
661        .unwrap()
662        .shift_after(shift_threshold, shift_delta);
663
664    // 3. Register the TableAnchor at the resolved byte position
665    //    and the resolved Vec position.
666    store.block_offsets.write().unwrap().insert_at(
667        new_entry_pos,
668        OffsetMarker::TableAnchor(table_id),
669        marker_byte_start,
670    );
671
672    // Note: SENTINEL_BYTES is part of `shift_delta` (3 for the
673    // sentinel + 1 for the `\n`).
674    let _ = SENTINEL;
675    let _ = SENTINEL_BYTES;
676}
677
678/// Append a U+FFFC table-anchor sentinel at the end of the rope and
679/// register a `TableAnchor(table_id)` marker. If the rope is already
680/// non-empty, prepends a `\n` boundary so the sentinel doesn't run
681/// into the previous entry's content.
682///
683/// Used by import paths (`import_html_uc`, `import_markdown_uc`)
684/// that process the document linearly and append entities as they
685/// encounter them, rather than inserting relative to an existing
686/// target block.
687pub fn rope_append_table_anchor(store: &Store, table_id: EntityId) {
688    let (anchor_byte_start, new_total) = {
689        let mut rope = store.rope.write().unwrap();
690        let was_empty = rope.len_bytes() == 0;
691        let char_end = rope.len_chars();
692        let to_insert = if was_empty { "\u{FFFC}" } else { "\n\u{FFFC}" };
693        rope.insert(char_end, to_insert);
694        let new_total = rope.len_bytes() as u32;
695        // Sentinel is 3 bytes; if a `\n` was prepended that's 1 byte
696        // before the sentinel.
697        let anchor_byte_start = new_total - 3;
698        (anchor_byte_start, new_total)
699    };
700
701    let mut offsets = store.block_offsets.write().unwrap();
702    offsets.push(OffsetMarker::TableAnchor(table_id), anchor_byte_start);
703    offsets.set_total_bytes(new_total);
704}
705
706/// Remove a TableAnchor sentinel from the rope, undoing the effect
707/// of `rope_insert_table_anchor`. Looks up the anchor's byte range
708/// (always 3 bytes for the U+FFFC plus 1 byte of inter-marker `\n`
709/// either before or after, depending on what's adjacent), removes
710/// those 4 bytes from the rope, drops the entry, shifts trailing
711/// entries by -4.
712///
713/// No-op if no TableAnchor for `table_id` exists.
714pub fn rope_remove_table_anchor(store: &Store, table_id: EntityId) {
715    let anchor_marker = OffsetMarker::TableAnchor(table_id);
716    let (anchor_byte_start, anchor_idx, anchor_is_last, has_predecessor) = {
717        let offsets = store.block_offsets.read().unwrap();
718        let Some((start, _end)) = offsets.range_of(anchor_marker) else {
719            return;
720        };
721        let idx = offsets
722            .entries
723            .iter()
724            .position(|(m, _)| *m == anchor_marker)
725            .unwrap();
726        let is_last = idx + 1 == offsets.entries.len();
727        let has_pred = idx > 0;
728        (start, idx, is_last, has_pred)
729    };
730
731    // Symmetric to insert_table_anchor. The 4 bytes to remove are:
732    // - if anchor is last: [byte_start - 1 .. byte_start + 3) — the
733    //   preceding `\n` + the 3-byte sentinel
734    // - otherwise: [byte_start .. byte_start + 4) — the sentinel
735    //   + the following `\n`
736    let (remove_start, remove_end) = if anchor_is_last && has_predecessor {
737        (anchor_byte_start - 1, anchor_byte_start + 3)
738    } else {
739        (anchor_byte_start, anchor_byte_start + 4)
740    };
741
742    {
743        let mut rope = store.rope.write().unwrap();
744        let char_start = rope.byte_to_char(remove_start as usize);
745        let char_end = rope.byte_to_char(remove_end as usize);
746        rope.remove(char_start..char_end);
747    }
748    {
749        let mut offsets = store.block_offsets.write().unwrap();
750        offsets.remove_at(anchor_idx);
751    }
752    store
753        .block_offsets
754        .write()
755        .unwrap()
756        .shift_after(remove_start, -4);
757}
758
759/// Remove a registered block from the rope: drops its content bytes
760/// plus one boundary `\n` (the one after, if the block has a
761/// successor; the one before, if it's the last entry), removes the
762/// entry from the index, and shifts trailing entries by the negative
763/// byte delta.
764///
765/// No-op if `block_id` is not in the index. No-op for the special
766/// case of a single-block document being asked to remove its sole
767/// block (we'd produce an empty rope but the block itself is being
768/// cascaded by the caller).
769pub fn rope_remove_block(store: &Store, block_id: EntityId) {
770    let block_marker = OffsetMarker::Block(block_id);
771    let (block_start, block_end, idx, is_last, has_pred) = {
772        let offsets = store.block_offsets.read().unwrap();
773        let Some((start, end)) = offsets.range_of(block_marker) else {
774            return;
775        };
776        let idx = offsets
777            .entries
778            .iter()
779            .position(|(m, _)| *m == block_marker)
780            .unwrap();
781        let is_last = idx + 1 == offsets.entries.len();
782        let has_pred = idx > 0;
783        (start, end, idx, is_last, has_pred)
784    };
785
786    // Determine the byte range to delete:
787    // - if there's a successor: [block_start..block_end) — the
788    //   block's content INCLUDING its trailing boundary `\n`
789    //   (which is the byte at block_end - 1)
790    // - if last and has predecessor: [block_start - 1..block_end)
791    //   — also delete the LEADING boundary `\n` that the previous
792    //   entry placed before us
793    // - if last and no predecessor (sole entry): just delete
794    //   [block_start..block_end) (no boundary `\n` exists)
795    let (remove_start, remove_end) = if is_last && has_pred {
796        (block_start.saturating_sub(1), block_end)
797    } else {
798        (block_start, block_end)
799    };
800    if remove_end <= remove_start {
801        // Drop the entry only; nothing to remove from the rope.
802        store.block_offsets.write().unwrap().remove_at(idx);
803        return;
804    }
805    let deleted_bytes = remove_end - remove_start;
806
807    {
808        let mut rope = store.rope.write().unwrap();
809        let char_start = rope.byte_to_char(remove_start as usize);
810        let char_end = rope.byte_to_char(remove_end as usize);
811        rope.remove(char_start..char_end);
812    }
813    store.block_offsets.write().unwrap().remove_at(idx);
814    // Shift entries STRICTLY PAST the removed range. Using `remove_end` as
815    // the threshold (rather than `remove_start`) keeps an empty predecessor
816    // whose byte_start equals `remove_start` (the leading boundary `\n` we
817    // just deleted) in place — its content position is unchanged, only its
818    // trailing boundary is gone. `total_bytes` decreases by `deleted_bytes`
819    // regardless of threshold.
820    store
821        .block_offsets
822        .write()
823        .unwrap()
824        .shift_after(remove_end, -(deleted_bytes as i32));
825}
826
827/// Replace the entire content of a registered block in the rope with
828/// `new_text`. Preserves the block's `byte_start` and its trailing
829/// boundary `\n` (if any); subsequent entries shift by the net
830/// length delta.
831///
832/// Used by use cases that compute a block's final content as a string
833/// and want to push that content to the rope in one shot — e.g. the
834/// block-splitting branches of `insert_html_at_position_uc` and
835/// `insert_markdown_at_position_uc`, where each affected block
836/// (head, tail, mid-replacement) gets a single new value.
837///
838/// No-op if `block_id` is not in the index.
839pub fn rope_replace_block_content(store: &Store, block_id: EntityId, new_text: &str) {
840    let (block_byte_start, content_bytes) = {
841        let offsets = store.block_offsets.read().unwrap();
842        let Some((start, end)) = offsets.range_of_block(block_id) else {
843            return;
844        };
845        let total = offsets.total_bytes();
846        // `range_of` extends to the next entry's `byte_start` (or to
847        // `total_bytes`). If there's a following entry, the byte at
848        // `end - 1` is the inter-block boundary `\n` that belongs to
849        // the boundary between this block and the next, not to this
850        // block's content.
851        let has_trailing_boundary = end < total;
852        let content_bytes = if has_trailing_boundary {
853            end - start - 1
854        } else {
855            end - start
856        };
857        (start, content_bytes)
858    };
859
860    let new_bytes = new_text.len() as u32;
861    if content_bytes == 0 && new_bytes == 0 {
862        return;
863    }
864    let delta = new_bytes as i32 - content_bytes as i32;
865
866    // Splice [block_byte_start..block_byte_start + content_bytes)
867    // with `new_text`.
868    {
869        let mut rope = store.rope.write().unwrap();
870        let char_start = rope.byte_to_char(block_byte_start as usize);
871        if content_bytes > 0 {
872            let char_end = rope.byte_to_char((block_byte_start + content_bytes) as usize);
873            rope.remove(char_start..char_end);
874        }
875        if new_bytes > 0 {
876            rope.insert(char_start, new_text);
877        }
878    }
879
880    if delta != 0 {
881        // Shift entries that sit strictly past this block's start
882        // (i.e. the trailing boundary and everything after).
883        store
884            .block_offsets
885            .write()
886            .unwrap()
887            .shift_after(block_byte_start + 1, delta);
888    }
889}
890
891/// Delete bytes `[byte_start_in_block..byte_end_in_block)` from inside
892/// the block identified by `block_id`. Shifts subsequent block offsets
893/// by the deleted byte length. No-op for blocks not in the index.
894pub fn rope_delete_in_block(
895    store: &Store,
896    block_id: EntityId,
897    byte_start_in_block: u32,
898    byte_end_in_block: u32,
899) {
900    if byte_end_in_block <= byte_start_in_block {
901        return;
902    }
903    let deleted_bytes = byte_end_in_block - byte_start_in_block;
904    let block_byte_start = {
905        let offsets = store.block_offsets.read().unwrap();
906        let Some((start, _end)) = offsets.range_of_block(block_id) else {
907            return;
908        };
909        start
910    };
911    let rope_byte_start = block_byte_start + byte_start_in_block;
912    let rope_byte_end = block_byte_start + byte_end_in_block;
913    {
914        let mut rope = store.rope.write().unwrap();
915        let char_start = rope.byte_to_char(rope_byte_start as usize);
916        let char_end = rope.byte_to_char(rope_byte_end as usize);
917        rope.remove(char_start..char_end);
918    }
919    store
920        .block_offsets
921        .write()
922        .unwrap()
923        .shift_after(block_byte_start + 1, -(deleted_bytes as i32));
924}
925
926/// Recompute `Frame.byte_range` for every frame in `store.frames`
927/// based on current `block_offsets` and the frame tree structure.
928/// Plan §1.6 invariant: each frame's byte_range is the (min_start,
929/// max_end) over all its descendant blocks, sub-frames, and table
930/// anchors+cells.
931///
932/// Call this after any mutation that affects rope byte positions.
933/// O(F + B) where F = frames, B = blocks in the document.
934pub fn recompute_all_frame_byte_ranges(store: &Store) {
935    let frame_ids: Vec<EntityId> = {
936        let frames = store.frames.read().unwrap();
937        frames.keys().copied().collect()
938    };
939    for fid in frame_ids {
940        let new_range = compute_frame_byte_range_recursive(store, fid);
941        let mut frames = store.frames.write().unwrap();
942        if let Some(f) = frames.get(&fid).cloned()
943            && f.byte_range != new_range
944        {
945            let mut updated = f;
946            updated.byte_range = new_range;
947            frames.insert(fid, updated);
948        }
949    }
950}
951
952fn compute_frame_byte_range_recursive(store: &Store, frame_id: EntityId) -> (u32, u32) {
953    let mut bounds: Option<(u32, u32)> = None;
954    walk_frame_bounds(store, frame_id, &mut bounds);
955    bounds.unwrap_or((0, 0))
956}
957
958fn walk_frame_bounds(store: &Store, frame_id: EntityId, bounds: &mut Option<(u32, u32)>) {
959    fn merge(bounds: &mut Option<(u32, u32)>, s: u32, e: u32) {
960        *bounds = Some(match *bounds {
961            None => (s, e),
962            Some((min, max)) => (min.min(s), max.max(e)),
963        });
964    }
965
966    let (blocks, child_order, table_id) = {
967        let frames = store.frames.read().unwrap();
968        let Some(f) = frames.get(&frame_id) else {
969            return;
970        };
971        (f.blocks.clone(), f.child_order.clone(), f.table)
972    };
973
974    {
975        let offsets = store.block_offsets.read().unwrap();
976        for bid in &blocks {
977            if let Some((s, e)) = offsets.range_of_block(*bid) {
978                merge(bounds, s, e);
979            }
980        }
981        if let Some(tid) = table_id
982            && let Some((s, e)) = offsets.range_of(OffsetMarker::TableAnchor(tid))
983        {
984            merge(bounds, s, e);
985        }
986    }
987
988    for entry in &child_order {
989        if *entry < 0 {
990            walk_frame_bounds(store, (-*entry) as EntityId, bounds);
991        }
992    }
993
994    if let Some(tid) = table_id {
995        let cell_ids: Vec<EntityId> = {
996            let tables = store.tables.read().unwrap();
997            tables
998                .get(&tid)
999                .map(|t| t.cells.clone())
1000                .unwrap_or_default()
1001        };
1002        for cell_id in &cell_ids {
1003            let cell_frame_id = {
1004                let cells = store.table_cells.read().unwrap();
1005                cells.get(cell_id).and_then(|c| c.cell_frame)
1006            };
1007            if let Some(cfid) = cell_frame_id {
1008                walk_frame_bounds(store, cfid, bounds);
1009            }
1010        }
1011    }
1012}