common/database/rope_helpers.rs
1//! Helpers for writing the global character rope from use cases.
2//!
3//! Each helper mutates `store.rope` and `store.block_offsets`
4//! together so callers can stay oblivious to the underlying layout.
5//! Read helpers (`block_content_via_store`) source content from the
6//! rope and return empty when a block is not yet registered in the
7//! offset index.
8
9use crate::database::Store;
10use crate::database::block_offset_index::OffsetMarker;
11use crate::entities::Block;
12use crate::types::EntityId;
13
14/// Read a block's content from the global rope via `block_offsets`,
15/// stripping the trailing `\n` boundary that `range_of` includes for
16/// non-last entries. Returns an empty string if the block isn't
17/// registered in the offset index (e.g. a freshly-created block that
18/// hasn't been spliced into the rope yet — `setup_with_text` test
19/// docs use this path).
20pub fn block_content_via_store(block: &Block, store: &Store) -> String {
21 let offsets = store.block_offsets.read();
22 let marker = OffsetMarker::Block(block.id);
23 let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
24 return String::new();
25 };
26 // Drop the trailing inter-block boundary `\n` ONLY when this block
27 // has a successor entry — that one byte is the boundary `\n` between
28 // this block and the next. The last entry has no trailing boundary;
29 // any final `\n` is real content.
30 let content_end = if has_successor && be > bs { be - 1 } else { be };
31 drop(offsets);
32 let rope = store.rope.read();
33 rope.byte_slice(bs as usize..content_end as usize)
34 .to_string()
35}
36
37/// Logical character count of a block — what the old
38/// `Block.text_length` field used to cache. Image anchors are stored as
39/// `\u{FFFC}` (one char, three bytes) inside the rope content, so the
40/// char count already covers them. Returns 0 for blocks not registered
41/// in the offset index.
42///
43/// O(log n) via `ropey::Rope::byte_to_char` — does NOT materialize the
44/// block's text into a String. Replaces the prior O(L) implementation
45/// that counted chars by walking UTF-8 over a cloned slice.
46pub fn block_char_length(block: &Block, store: &Store) -> i64 {
47 let offsets = store.block_offsets.read();
48 let marker = OffsetMarker::Block(block.id);
49 let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
50 return 0;
51 };
52 let content_end_bytes = if has_successor && be > bs { be - 1 } else { be };
53 drop(offsets);
54 let rope = store.rope.read();
55 let char_start = rope.byte_to_char(bs as usize);
56 let char_end = rope.byte_to_char(content_end_bytes as usize);
57 (char_end - char_start) as i64
58}
59
60/// Return the absolute character position of a block's start in the
61/// document, derived from the rope via `BlockOffsetIndex`.
62///
63/// O(log n): one `range_of_block` lookup + one `byte_to_char` conversion.
64/// Falls back to `block.document_position` for blocks not registered in
65/// the index — blocks under non-top-level frames (`insert_frame_uc` only
66/// mirrors top-level frames). Table-cell blocks ARE registered: they are
67/// mirrored inline into the rope in document order. The stored field is
68/// set authoritatively by the non-flow paths and stays correct for them
69/// across main-flow edits.
70pub fn block_document_position(block: &Block, store: &Store) -> i64 {
71 let offsets = store.block_offsets.read();
72 let Some((byte_start, _)) = offsets.range_of_block(block.id) else {
73 return block.document_position;
74 };
75 drop(offsets);
76 let rope = store.rope.read();
77 rope.byte_to_char(byte_start as usize) as i64
78}
79
80/// Whether the rope's char-position space matches the user-visible
81/// flow positions that `Block.document_position` is computed against.
82///
83/// They match when every block is mirrored to the rope as a contiguous
84/// run. They DON'T match when:
85/// 1. The document contains tables — cell content sits at separate rope
86/// byte ranges (plan §1.6), so the rope is missing the cells'
87/// flow-position contribution.
88/// 2. The document contains sub-frames whose blocks aren't mirrored —
89/// `insert_frame_uc` only mirrors top-level frames.
90///
91/// When this returns `true`, readers can derive `document_position`
92/// directly from the rope (O(log n)) and the use-case-side
93/// position-refresh loops can be skipped. When it returns `false`,
94/// readers must consult the maintained `Block.document_position`
95/// stored field, and the loops are required to keep it correct.
96pub fn rope_positions_match_flow(store: &Store) -> bool {
97 let offsets = store.block_offsets.read();
98 // The rope is authoritative as long as EVERY block is mirrored into it.
99 // Tables no longer disqualify it: cell content is mirrored inline, in
100 // document order, and the table itself occupies a 1-char anchor sentinel
101 // — so the rope's char space matches the user-visible flow order. (The
102 // flow snapshot derives its block positions from this same rope space,
103 // so the two agree by construction.) Only count Block markers; the
104 // TableAnchor sentinel entries are not blocks.
105 let indexed_block_count = offsets.entries.iter().filter(|(m, _)| m.is_block()).count();
106 drop(offsets);
107 let total_block_count = store.blocks.read().len();
108 indexed_block_count == total_block_count
109}
110
111/// Locate which block contains a given absolute char position in the
112/// document, returning `(block_id, char_offset_in_block, block_char_start)`
113/// in O(log n) using the rope + `BlockOffsetIndex` instead of an O(N)
114/// linear walk of all blocks.
115///
116/// Replaces the per-keystroke hot path in editing use cases
117/// (`find_block_at_position_sequential`) which fetched every block
118/// + called `block_char_length` per block. For an N-block document
119/// each editor keystroke now costs O(log n) lookups instead of O(N).
120///
121/// Returns `None` only when some block is unmirrored (a sub-frame
122/// inserted with a parent — `insert_frame_uc` mirrors only top-level
123/// frames), where the rope's char space diverges from flow order and
124/// callers must fall back to the slow per-block walk. Tables are fine:
125/// cell content is mirrored inline in document order and each table is a
126/// 1-char anchor sentinel, so byte→block lookup resolves correctly.
127///
128/// `position` past the document end clamps to the last block's
129/// end-of-content.
130pub fn find_block_at_char_position(store: &Store, position: i64) -> Option<(EntityId, i64, i64)> {
131 // Fast path is only valid when EVERY block in the document is
132 // mirrored to the rope. Disqualifying cases:
133 //
134 // 1. Documents containing tables — cell content sits at separate
135 // rope byte ranges (plan §1.6) so byte→block lookup finds the
136 // wrong block for cursor positions inside cells.
137 //
138 // 2. Documents containing sub-frames inserted with a parent —
139 // `insert_frame_uc` currently only mirrors top-level frames to
140 // the rope (parent=None case), leaving sub-frame blocks
141 // unregistered in the offset index. Detect by comparing block
142 // counts: if the rope index has fewer Block markers than the
143 // store has Block entities, some are unmirrored.
144 let offsets = store.block_offsets.read();
145 // Valid as long as every block is mirrored to the rope. Tables are fine:
146 // cell content is mirrored inline in document order and the table is a
147 // 1-char anchor sentinel, so byte→block lookup resolves correctly for any
148 // non-sentinel position. A position that lands exactly on the sentinel
149 // resolves to a TableAnchor marker, where `as_block()` returns None and
150 // the caller falls back to the slow walk. Only count Block markers.
151 let indexed_block_count = offsets.entries.iter().filter(|(m, _)| m.is_block()).count();
152 let total_block_count = store.blocks.read().len();
153 if indexed_block_count != total_block_count {
154 return None;
155 }
156 drop(offsets);
157
158 let rope = store.rope.read();
159 let total_chars = rope.len_chars() as i64;
160 let pos_clamped = position.clamp(0, total_chars);
161 let abs_byte = rope.char_to_byte(pos_clamped as usize);
162 drop(rope);
163
164 let offsets = store.block_offsets.read();
165 let block_id = offsets.marker_at_byte(abs_byte as u32)?.as_block()?;
166 let (bs, be, has_successor) = offsets.range_with_successor(OffsetMarker::Block(block_id))?;
167 let content_end = if has_successor && be > bs { be - 1 } else { be };
168
169 drop(offsets);
170
171 let rope = store.rope.read();
172 let block_char_start = rope.byte_to_char(bs as usize) as i64;
173 // Clamp the cursor's byte to the block's content area (not into the
174 // trailing `\n` boundary if any).
175 let byte_for_char = std::cmp::min(abs_byte, content_end as usize);
176 let abs_char = rope.byte_to_char(byte_for_char) as i64;
177 let char_in_block = abs_char - block_char_start;
178
179 // NOTE: separator semantics differ across use cases. Callers like
180 // `get_block_at_position_uc` interpret "position at the end of a
181 // non-empty block with a successor" as "belongs to the next
182 // block"; callers like `insert_text_uc` want the previous block
183 // with offset == block_char_len. This helper returns the
184 // previous-block answer (the simpler semantic); use-case-specific
185 // advance logic lives at the call site.
186 let _ = has_successor;
187
188 Some((block_id, char_in_block, block_char_start))
189}
190
191/// Convert an in-block char offset to an in-block byte offset using the
192/// rope's index. Both inputs and outputs are relative to the start of
193/// the block's content (NOT absolute rope positions). The char offset
194/// is clamped to the block's logical length so callers don't need to
195/// pre-validate.
196///
197/// O(log n) via `ropey::Rope::char_to_byte` — replaces the O(L)
198/// "materialize block text + walk char_indices" pattern that
199/// `set_text_format_uc` / `merge_text_format_uc` used. Returns
200/// `(byte_offset_in_block, content_byte_len)` so callers can also
201/// pass the content length to `debug_assert_well_formed` without
202/// materializing the text.
203///
204/// Returns `(0, 0)` for blocks not registered in the offset index.
205pub fn block_char_to_byte_in_block(
206 store: &Store,
207 block_id: EntityId,
208 char_offset: usize,
209) -> (u32, usize) {
210 let offsets = store.block_offsets.read();
211 let marker = OffsetMarker::Block(block_id);
212 let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
213 return (0, 0);
214 };
215 let content_end_bytes = if has_successor && be > bs { be - 1 } else { be };
216 let content_byte_len = (content_end_bytes - bs) as usize;
217 drop(offsets);
218
219 let rope = store.rope.read();
220 let block_char_start = rope.byte_to_char(bs as usize);
221 let block_char_end = rope.byte_to_char(content_end_bytes as usize);
222 let block_char_len = block_char_end - block_char_start;
223
224 // Clamp char_offset to block's char length.
225 let clamped = std::cmp::min(char_offset, block_char_len);
226 let abs_byte = rope.char_to_byte(block_char_start + clamped);
227 let byte_in_block = (abs_byte - bs as usize) as u32;
228 (byte_in_block, content_byte_len)
229}
230
231/// Fast-path full-document plain text: returns `Some(rope.to_string())`
232/// iff the document is in the canonical flat layout — no table anchors
233/// in the offset index, single top-level frame. In that case the
234/// rope's byte order is the same as the document-flow order, so one
235/// `to_string()` allocation replaces the O(N) per-block walk +
236/// per-block `Cow<str>` materialization that `build_full_text` /
237/// `export_plain_text` would otherwise do.
238///
239/// Returns `None` for documents containing tables or nested frames —
240/// those require the per-frame, per-block traversal because table
241/// cell content lives in separate byte ranges later in the rope
242/// (plan §1.6), not interleaved with the parent frame's bytes.
243///
244/// `top_frame_count` is the caller-known number of top-level frames
245/// (typically obtained from `Document.frames.len()`). Callers
246/// already have this value and pass it in to avoid a redundant uow
247/// query.
248pub fn rope_flat_text_if_simple(store: &Store, top_frame_count: usize) -> Option<String> {
249 if top_frame_count != 1 {
250 return None;
251 }
252 let offsets = store.block_offsets.read();
253 let has_table = offsets
254 .entries
255 .iter()
256 .any(|(m, _)| matches!(m, OffsetMarker::TableAnchor(_)));
257 if has_table {
258 return None;
259 }
260 drop(offsets);
261 Some(store.rope.read().to_string())
262}
263
264/// Whole-document searchable text straight from the rope, valid whenever
265/// the rope's char-position space matches the user-visible flow order
266/// (`rope_positions_match_flow`).
267///
268/// Unlike `rope_flat_text_if_simple` this does NOT bail on tables or
269/// multiple top-level frames: table-cell content is mirrored inline into
270/// the rope in document order and each table occupies a 1-char anchor
271/// sentinel, so the rope already contains all searchable text — including
272/// cell text — at the same char offsets that match positions are reported
273/// in. Returns `None` only when some block is unmirrored (e.g. a sub-frame
274/// inserted with a parent), where the caller must fall back to the
275/// per-frame, per-block traversal.
276pub fn rope_full_text_if_flow_matches(store: &Store) -> Option<String> {
277 rope_positions_match_flow(store).then(|| store.rope.read().to_string())
278}
279
280/// Reset the rope to empty and clear `block_offsets`. Called by
281/// importers when they replace the entire document content.
282pub fn rope_reset(store: &Store) {
283 *store.rope.write() = ropey::Rope::new();
284 *store.block_offsets.write() = crate::database::block_offset_index::BlockOffsetIndex::new();
285}
286
287/// Append `text` to the end of the rope and register `block_id` at
288/// the byte position where the text starts. Returns that byte offset.
289///
290/// Callers are responsible for inserting an inter-block `\n`
291/// (`rope_insert_block_boundary`) before each block AFTER the first
292/// in a contiguous frame.
293pub fn rope_append_block(store: &Store, block_id: EntityId, text: &str) -> u32 {
294 let mut rope = store.rope.write();
295 let byte_start = rope.len_bytes() as u32;
296 let char_end = rope.len_chars();
297 rope.insert(char_end, text);
298 let new_total = rope.len_bytes() as u32;
299 drop(rope);
300
301 let mut offsets = store.block_offsets.write();
302 offsets.push_block(block_id, byte_start);
303 offsets.set_total_bytes(new_total);
304 byte_start
305}
306
307/// Insert `text` as a new block at `byte_pos` in the rope, prepending
308/// a `\n` boundary. Used by `insert_table_uc` to place cell blocks at
309/// the end of their containing top-level frame's range (plan §1.6),
310/// rather than always at rope end.
311///
312/// Total bytes inserted: `1 + text.len()`. The block's content
313/// occupies `[byte_pos + 1, byte_pos + 1 + text.len())`. The block
314/// entry is registered at `byte_pos + 1` in `block_offsets`.
315///
316/// Existing entries with `byte_start == byte_pos` (e.g. a previous
317/// empty block whose end coincides with this insertion point) are
318/// kept BEFORE the new entry in the Vec, since the inserted `\n`
319/// boundary belongs after them. Entries strictly past `byte_pos`
320/// shift forward by `(1 + text.len())` bytes.
321///
322/// When `byte_pos == total_bytes`, behaves like
323/// `rope_insert_block_boundary` followed by `rope_append_block`.
324pub fn rope_insert_block_at(store: &Store, byte_pos: u32, block_id: EntityId, text: &str) {
325 let delta = (1 + text.len()) as i32;
326 // Vec position: insert AFTER any entry at byte_pos itself
327 // (those represent earlier empty blocks whose `\n` boundary
328 // we are placing now). Only entries strictly past byte_pos
329 // come after our new entry in the Vec.
330 let new_entry_vec_pos = {
331 let offsets = store.block_offsets.read();
332 offsets
333 .entries
334 .iter()
335 .position(|(_, bs)| *bs > byte_pos)
336 .unwrap_or(offsets.entries.len())
337 };
338 {
339 let mut rope = store.rope.write();
340 let char_idx = rope.byte_to_char(byte_pos as usize);
341 let mut combined = String::with_capacity(1 + text.len());
342 combined.push('\n');
343 combined.push_str(text);
344 rope.insert(char_idx, &combined);
345 }
346 let mut offsets = store.block_offsets.write();
347 // Shift entries strictly past byte_pos. Entries AT byte_pos
348 // (the prior empty block) stay where they are — the new `\n`
349 // is conceptually "after" them.
350 offsets.shift_after(byte_pos + 1, delta);
351 offsets.insert_at(
352 new_entry_vec_pos,
353 OffsetMarker::Block(block_id),
354 byte_pos + 1,
355 );
356}
357
358/// Walks up `frame.parent_frame` to find the top-level ancestor of
359/// the given frame, then returns the end byte of that top-level
360/// frame's current rope range — i.e. the byte position where blocks
361/// belonging to that frame's subtree (e.g. table cells per plan §1.6)
362/// should be inserted so they land BEFORE any following top-level
363/// frame's content.
364///
365/// Reads `block_offsets`/`frames`/`tables`/`table_cells` directly, so
366/// the result is fresh even when `Frame.byte_range` has not yet been
367/// recomputed at commit time.
368pub fn top_level_frame_end_byte(store: &Store, frame_id: EntityId) -> u32 {
369 let top_id = {
370 let frames = store.frames.read();
371 let mut current = frame_id;
372 loop {
373 let Some(f) = frames.get(¤t) else {
374 return 0;
375 };
376 match f.parent_frame {
377 None => break current,
378 Some(p) => current = p,
379 }
380 }
381 };
382 let (_min, max) = compute_frame_byte_range_recursive(store, top_id);
383 max
384}
385
386/// Append a new empty block to the end of the rope, separating it
387/// from any prior content with a `\n` boundary (only if the rope is
388/// already non-empty). Registers `block_id` at the resulting byte
389/// position. Returns that byte position. Used when `insert_frame_uc`
390/// creates a new top-level frame with a single empty block.
391pub fn rope_append_empty_block(store: &Store, block_id: EntityId) -> u32 {
392 let was_empty = store.rope.read().len_bytes() == 0;
393 if !was_empty {
394 rope_insert_block_boundary(store);
395 }
396 let pos = store.rope.read().len_bytes() as u32;
397 let mut offsets = store.block_offsets.write();
398 offsets.push_block(block_id, pos);
399 offsets.set_total_bytes(pos);
400 pos
401}
402
403/// Append a single `\n` inter-block boundary character to the end of
404/// the rope. Does NOT register a block — this is the sentinel between
405/// two adjacent blocks within the same frame (plan §1.4).
406pub fn rope_insert_block_boundary(store: &Store) {
407 let mut rope = store.rope.write();
408 let char_end = rope.len_chars();
409 rope.insert(char_end, "\n");
410 let new_total = rope.len_bytes() as u32;
411 drop(rope);
412
413 store.block_offsets.write().set_total_bytes(new_total);
414}
415
416/// Insert `text` at `byte_offset_in_block` inside the block identified
417/// by `block_id`. Looks up the block's start in the rope via
418/// `block_offsets.range_of()`, splices into the rope, and shifts
419/// subsequent block offsets by the inserted byte length.
420///
421/// Silently no-ops if the block is not registered in the offset index
422/// (this can happen for blocks whose content lives outside the global
423/// rope, e.g. table cells until step 5.5).
424pub fn rope_insert_in_block(
425 store: &Store,
426 block_id: EntityId,
427 byte_offset_in_block: u32,
428 text: &str,
429) {
430 let inserted_bytes = text.len() as u32;
431 if inserted_bytes == 0 {
432 return;
433 }
434 let block_byte_start = {
435 let offsets = store.block_offsets.read();
436 let Some((start, _end)) = offsets.range_of_block(block_id) else {
437 return;
438 };
439 start
440 };
441 let rope_byte = block_byte_start + byte_offset_in_block;
442 {
443 let mut rope = store.rope.write();
444 let char_idx = rope.byte_to_char(rope_byte as usize);
445 rope.insert(char_idx, text);
446 }
447 // Shift entries past this block by inserted_bytes. Threshold
448 // is one byte past block_byte_start so the current block's own
449 // entry isn't moved.
450 store
451 .block_offsets
452 .write()
453 .shift_after(block_byte_start + 1, inserted_bytes as i32);
454}
455
456/// Split an existing block in the rope at `byte_offset_in_block`:
457/// - inserts a `\n` inter-block boundary at the absolute byte position
458/// `block_start + byte_offset_in_block` in the rope
459/// - shifts entries past that position by +1 byte
460/// - inserts a new entry for `new_block_id` at
461/// `block_start + byte_offset_in_block + 1` (right after the newline),
462/// placed immediately after the original block in the entries Vec
463///
464/// `byte_offset_in_block` may be 0 (split before first char of block,
465/// i.e. insert empty block before this one) or equal to the block's
466/// byte length (split after last char, i.e. insert empty block after).
467pub fn rope_split_block(
468 store: &Store,
469 current_block_id: EntityId,
470 byte_offset_in_block: u32,
471 new_block_id: EntityId,
472) {
473 let current_marker = OffsetMarker::Block(current_block_id);
474 let (block_start, current_idx) = {
475 let offsets = store.block_offsets.read();
476 let Some((start, _end)) = offsets.range_of(current_marker) else {
477 return;
478 };
479 let idx = offsets
480 .entries
481 .iter()
482 .position(|(m, _)| *m == current_marker)
483 .unwrap();
484 (start, idx)
485 };
486 let split_byte = block_start + byte_offset_in_block;
487
488 // 1. Insert the `\n` boundary at the split point.
489 {
490 let mut rope = store.rope.write();
491 let char_idx = rope.byte_to_char(split_byte as usize);
492 rope.insert(char_idx, "\n");
493 }
494
495 // 2. Shift entries past the split (and total_bytes) by +1.
496 // Threshold > split_byte so the new entry we insert next
497 // isn't double-shifted.
498 store.block_offsets.write().shift_after(split_byte + 1, 1);
499
500 // 3. Register the new block at `split_byte + 1`, immediately
501 // after the original in the entries Vec.
502 store.block_offsets.write().insert_at(
503 current_idx + 1,
504 OffsetMarker::Block(new_block_id),
505 split_byte + 1,
506 );
507}
508
509/// Merge `start_block` and `end_block` by deleting the rope range
510/// `[start_block.start + byte_so .. end_block.start + byte_eo)` — i.e.
511/// the suffix of `start_block`, every block between (and their
512/// boundary newlines), and the prefix of `end_block`. Removes the
513/// index entries for every block strictly between `start_block` and
514/// `end_block` (inclusive of `end_block` itself); the surviving
515/// content lives in `start_block`. Shifts any blocks past `end_block`
516/// by the negative delta.
517///
518/// No-op if `start_block` is not in the index. Skipped for any
519/// intermediate block id whose range is missing from the index (e.g.
520/// table cells until step 5.5e).
521pub fn rope_merge_block_range(
522 store: &Store,
523 start_block_id: EntityId,
524 byte_so_in_start: u32,
525 end_block_id: EntityId,
526 byte_eo_in_end: u32,
527) {
528 let start_marker = OffsetMarker::Block(start_block_id);
529 let end_marker = OffsetMarker::Block(end_block_id);
530 let (start_block_byte, end_block_byte, start_idx, end_idx) = {
531 let offsets = store.block_offsets.read();
532 let Some((sb, _)) = offsets.range_of(start_marker) else {
533 return;
534 };
535 let Some((eb, _)) = offsets.range_of(end_marker) else {
536 return;
537 };
538 let si = offsets
539 .entries
540 .iter()
541 .position(|(m, _)| *m == start_marker)
542 .unwrap();
543 let ei = offsets
544 .entries
545 .iter()
546 .position(|(m, _)| *m == end_marker)
547 .unwrap();
548 (sb, eb, si, ei)
549 };
550 if end_idx <= start_idx {
551 return;
552 }
553
554 let delete_start = start_block_byte + byte_so_in_start;
555 let delete_end = end_block_byte + byte_eo_in_end;
556 if delete_end <= delete_start {
557 return;
558 }
559 let deleted_bytes = delete_end - delete_start;
560
561 // 1. Remove the rope range.
562 {
563 let mut rope = store.rope.write();
564 let char_start = rope.byte_to_char(delete_start as usize);
565 let char_end = rope.byte_to_char(delete_end as usize);
566 rope.remove(char_start..char_end);
567 }
568
569 // 2. Remove block_offsets entries for [start_idx+1 ..= end_idx].
570 {
571 let mut offsets = store.block_offsets.write();
572 offsets.drain_inclusive(start_idx + 1, end_idx);
573 }
574
575 // 3. Shift any remaining entries past the deletion by -deleted_bytes.
576 // Threshold > delete_start because start_block's own entry
577 // sits at delete_start - byte_so_in_start (≤ delete_start)
578 // and must not move.
579 store
580 .block_offsets
581 .write()
582 .shift_after(delete_start + 1, -(deleted_bytes as i32));
583}
584
585/// Insert a U+FFFC OBJECT REPLACEMENT CHARACTER sentinel in the rope
586/// at the table-anchor position, registering a `TableAnchor(table_id)`
587/// marker in the offset index (plan §1.6).
588///
589/// `target_block_id` is the block in the parent frame that the table
590/// is adjacent to. `after` controls whether the table goes BEFORE
591/// (`after = false`) or AFTER the target block.
592///
593/// The 3-byte sentinel is paired with an inter-marker `\n`:
594/// - `after = false`: inserts `\u{FFFC}\n` at `target.byte_start`
595/// - `after = true`, target is NOT the last entry: inserts
596/// `\u{FFFC}\n` at `target.byte_end` (between target's trailing
597/// `\n` and the next entry)
598/// - `after = true`, target IS the last entry: inserts `\n\u{FFFC}`
599/// at `target.byte_end` (rope now ends with the sentinel)
600///
601/// NOTE: cell-internal content is not yet routed through the rope —
602/// the rope reflects table *presence* (3-byte sentinel) only.
603/// Routing cell content is deferred to plan §1.6's `Frame.byte_range`
604/// model.
605///
606/// No-op if `target_block_id` is not in the index.
607pub fn rope_insert_table_anchor(
608 store: &Store,
609 table_id: EntityId,
610 target_block_id: EntityId,
611 after: bool,
612) {
613 const SENTINEL: &str = "\u{FFFC}"; // 3 bytes
614 const SENTINEL_BYTES: u32 = 3;
615
616 let (insert_pos, target_idx, target_is_last) = {
617 let offsets = store.block_offsets.read();
618 let target_marker = OffsetMarker::Block(target_block_id);
619 let Some((start, end)) = offsets.range_of(target_marker) else {
620 return;
621 };
622 let idx = offsets
623 .entries
624 .iter()
625 .position(|(m, _)| *m == target_marker)
626 .unwrap();
627 let is_last = idx + 1 == offsets.entries.len();
628 let pos = if after { end } else { start };
629 (pos, idx, is_last)
630 };
631
632 // Insertion strategy:
633 let (rope_inserted, marker_byte_start, new_entry_pos, shift_threshold, shift_delta) = if !after
634 {
635 // Before target: "\u{FFFC}\n" at target.byte_start
636 ("\u{FFFC}\n", insert_pos, target_idx, insert_pos, 4i32)
637 } else if !target_is_last {
638 // After target, with following entries: "\u{FFFC}\n"
639 // at target.byte_end. The TableAnchor sits where the
640 // next block USED to start; that following entry
641 // shifts by 4.
642 ("\u{FFFC}\n", insert_pos, target_idx + 1, insert_pos, 4i32)
643 } else {
644 // After target which is last: "\n\u{FFFC}" appended.
645 // TableAnchor's byte_start sits 1 past the original
646 // total (after the new `\n`).
647 (
648 "\n\u{FFFC}",
649 insert_pos + 1,
650 target_idx + 1,
651 insert_pos,
652 4i32,
653 )
654 };
655
656 // 1. Splice the literal bytes into the rope.
657 {
658 let mut rope = store.rope.write();
659 let char_idx = rope.byte_to_char(insert_pos as usize);
660 rope.insert(char_idx, rope_inserted);
661 }
662
663 // 2. Shift entries past the insertion point. Use shift_after
664 // BEFORE inserting our new entry so we don't double-shift.
665 store
666 .block_offsets
667 .write()
668 .shift_after(shift_threshold, shift_delta);
669
670 // 3. Register the TableAnchor at the resolved byte position
671 // and the resolved Vec position.
672 store.block_offsets.write().insert_at(
673 new_entry_pos,
674 OffsetMarker::TableAnchor(table_id),
675 marker_byte_start,
676 );
677
678 // Note: SENTINEL_BYTES is part of `shift_delta` (3 for the
679 // sentinel + 1 for the `\n`).
680 let _ = SENTINEL;
681 let _ = SENTINEL_BYTES;
682}
683
684/// Append a U+FFFC table-anchor sentinel at the end of the rope and
685/// register a `TableAnchor(table_id)` marker. If the rope is already
686/// non-empty, prepends a `\n` boundary so the sentinel doesn't run
687/// into the previous entry's content.
688///
689/// Used by import paths (`import_html_uc`, `import_markdown_uc`)
690/// that process the document linearly and append entities as they
691/// encounter them, rather than inserting relative to an existing
692/// target block.
693pub fn rope_append_table_anchor(store: &Store, table_id: EntityId) {
694 let (anchor_byte_start, new_total) = {
695 let mut rope = store.rope.write();
696 let was_empty = rope.len_bytes() == 0;
697 let char_end = rope.len_chars();
698 let to_insert = if was_empty { "\u{FFFC}" } else { "\n\u{FFFC}" };
699 rope.insert(char_end, to_insert);
700 let new_total = rope.len_bytes() as u32;
701 // Sentinel is 3 bytes; if a `\n` was prepended that's 1 byte
702 // before the sentinel.
703 let anchor_byte_start = new_total - 3;
704 (anchor_byte_start, new_total)
705 };
706
707 let mut offsets = store.block_offsets.write();
708 offsets.push(OffsetMarker::TableAnchor(table_id), anchor_byte_start);
709 offsets.set_total_bytes(new_total);
710}
711
712/// Remove a TableAnchor sentinel from the rope, undoing the effect
713/// of `rope_insert_table_anchor`. Looks up the anchor's byte range
714/// (always 3 bytes for the U+FFFC plus 1 byte of inter-marker `\n`
715/// either before or after, depending on what's adjacent), removes
716/// those 4 bytes from the rope, drops the entry, shifts trailing
717/// entries by -4.
718///
719/// No-op if no TableAnchor for `table_id` exists.
720pub fn rope_remove_table_anchor(store: &Store, table_id: EntityId) {
721 let anchor_marker = OffsetMarker::TableAnchor(table_id);
722 let (anchor_byte_start, anchor_idx, anchor_is_last, has_predecessor) = {
723 let offsets = store.block_offsets.read();
724 let Some((start, _end)) = offsets.range_of(anchor_marker) else {
725 return;
726 };
727 let idx = offsets
728 .entries
729 .iter()
730 .position(|(m, _)| *m == anchor_marker)
731 .unwrap();
732 let is_last = idx + 1 == offsets.entries.len();
733 let has_pred = idx > 0;
734 (start, idx, is_last, has_pred)
735 };
736
737 // Symmetric to insert_table_anchor. The 4 bytes to remove are:
738 // - if anchor is last: [byte_start - 1 .. byte_start + 3) — the
739 // preceding `\n` + the 3-byte sentinel
740 // - otherwise: [byte_start .. byte_start + 4) — the sentinel
741 // + the following `\n`
742 let (remove_start, remove_end) = if anchor_is_last && has_predecessor {
743 (anchor_byte_start - 1, anchor_byte_start + 3)
744 } else {
745 (anchor_byte_start, anchor_byte_start + 4)
746 };
747
748 {
749 let mut rope = store.rope.write();
750 let char_start = rope.byte_to_char(remove_start as usize);
751 let char_end = rope.byte_to_char(remove_end as usize);
752 rope.remove(char_start..char_end);
753 }
754 {
755 let mut offsets = store.block_offsets.write();
756 offsets.remove_at(anchor_idx);
757 }
758 store.block_offsets.write().shift_after(remove_start, -4);
759}
760
761/// Remove a registered block from the rope: drops its content bytes
762/// plus one boundary `\n` (the one after, if the block has a
763/// successor; the one before, if it's the last entry), removes the
764/// entry from the index, and shifts trailing entries by the negative
765/// byte delta.
766///
767/// No-op if `block_id` is not in the index. No-op for the special
768/// case of a single-block document being asked to remove its sole
769/// block (we'd produce an empty rope but the block itself is being
770/// cascaded by the caller).
771pub fn rope_remove_block(store: &Store, block_id: EntityId) {
772 let block_marker = OffsetMarker::Block(block_id);
773 let (block_start, block_end, idx, is_last, has_pred) = {
774 let offsets = store.block_offsets.read();
775 let Some((start, end)) = offsets.range_of(block_marker) else {
776 return;
777 };
778 let idx = offsets
779 .entries
780 .iter()
781 .position(|(m, _)| *m == block_marker)
782 .unwrap();
783 let is_last = idx + 1 == offsets.entries.len();
784 let has_pred = idx > 0;
785 (start, end, idx, is_last, has_pred)
786 };
787
788 // Determine the byte range to delete:
789 // - if there's a successor: [block_start..block_end) — the
790 // block's content INCLUDING its trailing boundary `\n`
791 // (which is the byte at block_end - 1)
792 // - if last and has predecessor: [block_start - 1..block_end)
793 // — also delete the LEADING boundary `\n` that the previous
794 // entry placed before us
795 // - if last and no predecessor (sole entry): just delete
796 // [block_start..block_end) (no boundary `\n` exists)
797 let (remove_start, remove_end) = if is_last && has_pred {
798 (block_start.saturating_sub(1), block_end)
799 } else {
800 (block_start, block_end)
801 };
802 if remove_end <= remove_start {
803 // Drop the entry only; nothing to remove from the rope.
804 store.block_offsets.write().remove_at(idx);
805 return;
806 }
807 let deleted_bytes = remove_end - remove_start;
808
809 {
810 let mut rope = store.rope.write();
811 let char_start = rope.byte_to_char(remove_start as usize);
812 let char_end = rope.byte_to_char(remove_end as usize);
813 rope.remove(char_start..char_end);
814 }
815 store.block_offsets.write().remove_at(idx);
816 // Shift entries STRICTLY PAST the removed range. Using `remove_end` as
817 // the threshold (rather than `remove_start`) keeps an empty predecessor
818 // whose byte_start equals `remove_start` (the leading boundary `\n` we
819 // just deleted) in place — its content position is unchanged, only its
820 // trailing boundary is gone. `total_bytes` decreases by `deleted_bytes`
821 // regardless of threshold.
822 store
823 .block_offsets
824 .write()
825 .shift_after(remove_end, -(deleted_bytes as i32));
826}
827
828/// Replace the entire content of a registered block in the rope with
829/// `new_text`. Preserves the block's `byte_start` and its trailing
830/// boundary `\n` (if any); subsequent entries shift by the net
831/// length delta.
832///
833/// Used by use cases that compute a block's final content as a string
834/// and want to push that content to the rope in one shot — e.g. the
835/// block-splitting branches of `insert_html_at_position_uc` and
836/// `insert_markdown_at_position_uc`, where each affected block
837/// (head, tail, mid-replacement) gets a single new value.
838///
839/// No-op if `block_id` is not in the index.
840pub fn rope_replace_block_content(store: &Store, block_id: EntityId, new_text: &str) {
841 let (block_byte_start, content_bytes) = {
842 let offsets = store.block_offsets.read();
843 let Some((start, end)) = offsets.range_of_block(block_id) else {
844 return;
845 };
846 let total = offsets.total_bytes();
847 // `range_of` extends to the next entry's `byte_start` (or to
848 // `total_bytes`). If there's a following entry, the byte at
849 // `end - 1` is the inter-block boundary `\n` that belongs to
850 // the boundary between this block and the next, not to this
851 // block's content.
852 let has_trailing_boundary = end < total;
853 let content_bytes = if has_trailing_boundary {
854 end - start - 1
855 } else {
856 end - start
857 };
858 (start, content_bytes)
859 };
860
861 let new_bytes = new_text.len() as u32;
862 if content_bytes == 0 && new_bytes == 0 {
863 return;
864 }
865 let delta = new_bytes as i32 - content_bytes as i32;
866
867 // Splice [block_byte_start..block_byte_start + content_bytes)
868 // with `new_text`.
869 {
870 let mut rope = store.rope.write();
871 let char_start = rope.byte_to_char(block_byte_start as usize);
872 if content_bytes > 0 {
873 let char_end = rope.byte_to_char((block_byte_start + content_bytes) as usize);
874 rope.remove(char_start..char_end);
875 }
876 if new_bytes > 0 {
877 rope.insert(char_start, new_text);
878 }
879 }
880
881 if delta != 0 {
882 // Shift entries that sit strictly past this block's start
883 // (i.e. the trailing boundary and everything after).
884 store
885 .block_offsets
886 .write()
887 .shift_after(block_byte_start + 1, delta);
888 }
889}
890
891/// Delete bytes `[byte_start_in_block..byte_end_in_block)` from inside
892/// the block identified by `block_id`. Shifts subsequent block offsets
893/// by the deleted byte length. No-op for blocks not in the index.
894pub fn rope_delete_in_block(
895 store: &Store,
896 block_id: EntityId,
897 byte_start_in_block: u32,
898 byte_end_in_block: u32,
899) {
900 if byte_end_in_block <= byte_start_in_block {
901 return;
902 }
903 let deleted_bytes = byte_end_in_block - byte_start_in_block;
904 let block_byte_start = {
905 let offsets = store.block_offsets.read();
906 let Some((start, _end)) = offsets.range_of_block(block_id) else {
907 return;
908 };
909 start
910 };
911 let rope_byte_start = block_byte_start + byte_start_in_block;
912 let rope_byte_end = block_byte_start + byte_end_in_block;
913 {
914 let mut rope = store.rope.write();
915 let char_start = rope.byte_to_char(rope_byte_start as usize);
916 let char_end = rope.byte_to_char(rope_byte_end as usize);
917 rope.remove(char_start..char_end);
918 }
919 store
920 .block_offsets
921 .write()
922 .shift_after(block_byte_start + 1, -(deleted_bytes as i32));
923}
924
925/// Recompute `Frame.byte_range` for every frame in `store.frames`
926/// based on current `block_offsets` and the frame tree structure.
927/// Plan §1.6 invariant: each frame's byte_range is the (min_start,
928/// max_end) over all its descendant blocks, sub-frames, and table
929/// anchors+cells.
930///
931/// Call this after any mutation that affects rope byte positions.
932/// O(F + B) where F = frames, B = blocks in the document.
933pub fn recompute_all_frame_byte_ranges(store: &Store) {
934 let frame_ids: Vec<EntityId> = {
935 let frames = store.frames.read();
936 frames.keys().copied().collect()
937 };
938 for fid in frame_ids {
939 let new_range = compute_frame_byte_range_recursive(store, fid);
940 let mut frames = store.frames.write();
941 if let Some(f) = frames.get(&fid).cloned()
942 && f.byte_range != new_range
943 {
944 let mut updated = f;
945 updated.byte_range = new_range;
946 frames.insert(fid, updated);
947 }
948 }
949}
950
951fn compute_frame_byte_range_recursive(store: &Store, frame_id: EntityId) -> (u32, u32) {
952 let mut bounds: Option<(u32, u32)> = None;
953 walk_frame_bounds(store, frame_id, &mut bounds);
954 bounds.unwrap_or((0, 0))
955}
956
957fn walk_frame_bounds(store: &Store, frame_id: EntityId, bounds: &mut Option<(u32, u32)>) {
958 fn merge(bounds: &mut Option<(u32, u32)>, s: u32, e: u32) {
959 *bounds = Some(match *bounds {
960 None => (s, e),
961 Some((min, max)) => (min.min(s), max.max(e)),
962 });
963 }
964
965 let (blocks, child_order, table_id) = {
966 let frames = store.frames.read();
967 let Some(f) = frames.get(&frame_id) else {
968 return;
969 };
970 (f.blocks.clone(), f.child_order.clone(), f.table)
971 };
972
973 {
974 let offsets = store.block_offsets.read();
975 for bid in &blocks {
976 if let Some((s, e)) = offsets.range_of_block(*bid) {
977 merge(bounds, s, e);
978 }
979 }
980 if let Some(tid) = table_id
981 && let Some((s, e)) = offsets.range_of(OffsetMarker::TableAnchor(tid))
982 {
983 merge(bounds, s, e);
984 }
985 }
986
987 for entry in &child_order {
988 if *entry < 0 {
989 walk_frame_bounds(store, (-*entry) as EntityId, bounds);
990 }
991 }
992
993 if let Some(tid) = table_id {
994 let cell_ids: Vec<EntityId> = {
995 let tables = store.tables.read();
996 tables
997 .get(&tid)
998 .map(|t| t.cells.clone())
999 .unwrap_or_default()
1000 };
1001 for cell_id in &cell_ids {
1002 let cell_frame_id = {
1003 let cells = store.table_cells.read();
1004 cells.get(cell_id).and_then(|c| c.cell_frame)
1005 };
1006 if let Some(cfid) = cell_frame_id {
1007 walk_frame_bounds(store, cfid, bounds);
1008 }
1009 }
1010 }
1011}