text-document-common 1.5.4

Shared entities, database, events, and undo/redo infrastructure for text-document
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
//! Helpers for writing the global character rope from use cases.
//!
//! Each helper mutates `store.rope` and `store.block_offsets`
//! together so callers can stay oblivious to the underlying layout.
//! Read helpers (`block_content_via_store`) source content from the
//! rope and return empty when a block is not yet registered in the
//! offset index.

use crate::database::Store;
use crate::database::block_offset_index::OffsetMarker;
use crate::entities::Block;
use crate::types::EntityId;

/// Read a block's content from the global rope via `block_offsets`,
/// stripping the trailing `\n` boundary that `range_of` includes for
/// non-last entries. Returns an empty string if the block isn't
/// registered in the offset index (e.g. a freshly-created block that
/// hasn't been spliced into the rope yet — `setup_with_text` test
/// docs use this path).
pub fn block_content_via_store(block: &Block, store: &Store) -> String {
    let offsets = store.block_offsets.read().unwrap();
    let marker = OffsetMarker::Block(block.id);
    let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
        return String::new();
    };
    // Drop the trailing inter-block boundary `\n` ONLY when this block
    // has a successor entry — that one byte is the boundary `\n` between
    // this block and the next. The last entry has no trailing boundary;
    // any final `\n` is real content.
    let content_end = if has_successor && be > bs { be - 1 } else { be };
    drop(offsets);
    let rope = store.rope.read().unwrap();
    rope.byte_slice(bs as usize..content_end as usize)
        .to_string()
}

/// Logical character count of a block — what the old
/// `Block.text_length` field used to cache. Image anchors are stored as
/// `\u{FFFC}` (one char, three bytes) inside the rope content, so the
/// char count already covers them. Returns 0 for blocks not registered
/// in the offset index.
///
/// O(log n) via `ropey::Rope::byte_to_char` — does NOT materialize the
/// block's text into a String. Replaces the prior O(L) implementation
/// that counted chars by walking UTF-8 over a cloned slice.
pub fn block_char_length(block: &Block, store: &Store) -> i64 {
    let offsets = store.block_offsets.read().unwrap();
    let marker = OffsetMarker::Block(block.id);
    let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
        return 0;
    };
    let content_end_bytes = if has_successor && be > bs { be - 1 } else { be };
    drop(offsets);
    let rope = store.rope.read().unwrap();
    let char_start = rope.byte_to_char(bs as usize);
    let char_end = rope.byte_to_char(content_end_bytes as usize);
    (char_end - char_start) as i64
}

/// Return the absolute character position of a block's start in the
/// document, derived from the rope via `BlockOffsetIndex`.
///
/// O(log n): one `range_of_block` lookup + one `byte_to_char` conversion.
/// Falls back to `block.document_position` for blocks not registered in
/// the index — currently table-cell blocks (plan §1.6 deferred) and
/// blocks under non-top-level frames (`insert_frame_uc` only mirrors
/// top-level frames). The stored field is set authoritatively by those
/// non-flow paths and stays correct for them across main-flow edits.
pub fn block_document_position(block: &Block, store: &Store) -> i64 {
    let offsets = store.block_offsets.read().unwrap();
    let Some((byte_start, _)) = offsets.range_of_block(block.id) else {
        return block.document_position;
    };
    drop(offsets);
    let rope = store.rope.read().unwrap();
    rope.byte_to_char(byte_start as usize) as i64
}

/// Whether the rope's char-position space matches the user-visible
/// flow positions that `Block.document_position` is computed against.
///
/// They match when every block is mirrored to the rope as a contiguous
/// run. They DON'T match when:
/// 1. The document contains tables — cell content sits at separate rope
///    byte ranges (plan §1.6), so the rope is missing the cells'
///    flow-position contribution.
/// 2. The document contains sub-frames whose blocks aren't mirrored —
///    `insert_frame_uc` only mirrors top-level frames.
///
/// When this returns `true`, readers can derive `document_position`
/// directly from the rope (O(log n)) and the use-case-side
/// position-refresh loops can be skipped. When it returns `false`,
/// readers must consult the maintained `Block.document_position`
/// stored field, and the loops are required to keep it correct.
pub fn rope_positions_match_flow(store: &Store) -> bool {
    let offsets = store.block_offsets.read().unwrap();
    if offsets.table_anchor_count() > 0 {
        return false;
    }
    // With no table anchors, every entry is a Block marker, so
    // `entries.len()` equals the indexed block count.
    let indexed_block_count = offsets.entries.len();
    drop(offsets);
    let total_block_count = store.blocks.read().unwrap().len();
    indexed_block_count == total_block_count
}

/// Locate which block contains a given absolute char position in the
/// document, returning `(block_id, char_offset_in_block, block_char_start)`
/// in O(log n) using the rope + `BlockOffsetIndex` instead of an O(N)
/// linear walk of all blocks.
///
/// Replaces the per-keystroke hot path in editing use cases
/// (`find_block_at_position_sequential`) which fetched every block
/// + called `block_char_length` per block. For an N-block document
///   each editor keystroke now costs O(log n) lookups instead of O(N).
///
/// Returns `None` for documents containing tables — table cell content
/// lives in separate byte ranges later in the rope (plan §1.6), so the
/// rope's char-position space diverges from the user-visible flow
/// order. Callers must fall back to the slow per-block walk in that
/// case.
///
/// `position` past the document end clamps to the last block's
/// end-of-content.
pub fn find_block_at_char_position(store: &Store, position: i64) -> Option<(EntityId, i64, i64)> {
    // Fast path is only valid when EVERY block in the document is
    // mirrored to the rope. Disqualifying cases:
    //
    // 1. Documents containing tables — cell content sits at separate
    //    rope byte ranges (plan §1.6) so byte→block lookup finds the
    //    wrong block for cursor positions inside cells.
    //
    // 2. Documents containing sub-frames inserted with a parent —
    //    `insert_frame_uc` currently only mirrors top-level frames to
    //    the rope (parent=None case), leaving sub-frame blocks
    //    unregistered in the offset index. Detect by comparing block
    //    counts: if the rope index has fewer Block markers than the
    //    store has Block entities, some are unmirrored.
    let offsets = store.block_offsets.read().unwrap();
    if offsets.table_anchor_count() > 0 {
        return None;
    }
    // No table anchors → every entry is a Block marker, so the indexed
    // block count is simply `entries.len()`. O(1) instead of an O(N)
    // filter-count on every keystroke.
    let indexed_block_count = offsets.entries.len();
    let total_block_count = store.blocks.read().unwrap().len();
    if indexed_block_count != total_block_count {
        return None;
    }
    drop(offsets);

    let rope = store.rope.read().unwrap();
    let total_chars = rope.len_chars() as i64;
    let pos_clamped = position.clamp(0, total_chars);
    let abs_byte = rope.char_to_byte(pos_clamped as usize);
    drop(rope);

    let offsets = store.block_offsets.read().unwrap();
    let block_id = offsets.marker_at_byte(abs_byte as u32)?.as_block()?;
    let (bs, be, has_successor) = offsets.range_with_successor(OffsetMarker::Block(block_id))?;
    let content_end = if has_successor && be > bs { be - 1 } else { be };

    drop(offsets);

    let rope = store.rope.read().unwrap();
    let block_char_start = rope.byte_to_char(bs as usize) as i64;
    // Clamp the cursor's byte to the block's content area (not into the
    // trailing `\n` boundary if any).
    let byte_for_char = std::cmp::min(abs_byte, content_end as usize);
    let abs_char = rope.byte_to_char(byte_for_char) as i64;
    let char_in_block = abs_char - block_char_start;

    // NOTE: separator semantics differ across use cases. Callers like
    // `get_block_at_position_uc` interpret "position at the end of a
    // non-empty block with a successor" as "belongs to the next
    // block"; callers like `insert_text_uc` want the previous block
    // with offset == block_char_len. This helper returns the
    // previous-block answer (the simpler semantic); use-case-specific
    // advance logic lives at the call site.
    let _ = has_successor;

    Some((block_id, char_in_block, block_char_start))
}

/// Convert an in-block char offset to an in-block byte offset using the
/// rope's index. Both inputs and outputs are relative to the start of
/// the block's content (NOT absolute rope positions). The char offset
/// is clamped to the block's logical length so callers don't need to
/// pre-validate.
///
/// O(log n) via `ropey::Rope::char_to_byte` — replaces the O(L)
/// "materialize block text + walk char_indices" pattern that
/// `set_text_format_uc` / `merge_text_format_uc` used. Returns
/// `(byte_offset_in_block, content_byte_len)` so callers can also
/// pass the content length to `debug_assert_well_formed` without
/// materializing the text.
///
/// Returns `(0, 0)` for blocks not registered in the offset index.
pub fn block_char_to_byte_in_block(
    store: &Store,
    block_id: EntityId,
    char_offset: usize,
) -> (u32, usize) {
    let offsets = store.block_offsets.read().unwrap();
    let marker = OffsetMarker::Block(block_id);
    let Some((bs, be, has_successor)) = offsets.range_with_successor(marker) else {
        return (0, 0);
    };
    let content_end_bytes = if has_successor && be > bs { be - 1 } else { be };
    let content_byte_len = (content_end_bytes - bs) as usize;
    drop(offsets);

    let rope = store.rope.read().unwrap();
    let block_char_start = rope.byte_to_char(bs as usize);
    let block_char_end = rope.byte_to_char(content_end_bytes as usize);
    let block_char_len = block_char_end - block_char_start;

    // Clamp char_offset to block's char length.
    let clamped = std::cmp::min(char_offset, block_char_len);
    let abs_byte = rope.char_to_byte(block_char_start + clamped);
    let byte_in_block = (abs_byte - bs as usize) as u32;
    (byte_in_block, content_byte_len)
}

/// Fast-path full-document plain text: returns `Some(rope.to_string())`
/// iff the document is in the canonical flat layout — no table anchors
/// in the offset index, single top-level frame. In that case the
/// rope's byte order is the same as the document-flow order, so one
/// `to_string()` allocation replaces the O(N) per-block walk +
/// per-block `Cow<str>` materialization that `build_full_text` /
/// `export_plain_text` would otherwise do.
///
/// Returns `None` for documents containing tables or nested frames —
/// those require the per-frame, per-block traversal because table
/// cell content lives in separate byte ranges later in the rope
/// (plan §1.6), not interleaved with the parent frame's bytes.
///
/// `top_frame_count` is the caller-known number of top-level frames
/// (typically obtained from `Document.frames.len()`). Callers
/// already have this value and pass it in to avoid a redundant uow
/// query.
pub fn rope_flat_text_if_simple(store: &Store, top_frame_count: usize) -> Option<String> {
    if top_frame_count != 1 {
        return None;
    }
    let offsets = store.block_offsets.read().unwrap();
    let has_table = offsets
        .entries
        .iter()
        .any(|(m, _)| matches!(m, OffsetMarker::TableAnchor(_)));
    if has_table {
        return None;
    }
    drop(offsets);
    Some(store.rope.read().unwrap().to_string())
}

/// Reset the rope to empty and clear `block_offsets`. Called by
/// importers when they replace the entire document content.
pub fn rope_reset(store: &Store) {
    *store.rope.write().unwrap() = ropey::Rope::new();
    *store.block_offsets.write().unwrap() =
        crate::database::block_offset_index::BlockOffsetIndex::new();
}

/// Append `text` to the end of the rope and register `block_id` at
/// the byte position where the text starts. Returns that byte offset.
///
/// Callers are responsible for inserting an inter-block `\n`
/// (`rope_insert_block_boundary`) before each block AFTER the first
/// in a contiguous frame.
pub fn rope_append_block(store: &Store, block_id: EntityId, text: &str) -> u32 {
    let mut rope = store.rope.write().unwrap();
    let byte_start = rope.len_bytes() as u32;
    let char_end = rope.len_chars();
    rope.insert(char_end, text);
    let new_total = rope.len_bytes() as u32;
    drop(rope);

    let mut offsets = store.block_offsets.write().unwrap();
    offsets.push_block(block_id, byte_start);
    offsets.set_total_bytes(new_total);
    byte_start
}

/// Insert `text` as a new block at `byte_pos` in the rope, prepending
/// a `\n` boundary. Used by `insert_table_uc` to place cell blocks at
/// the end of their containing top-level frame's range (plan §1.6),
/// rather than always at rope end.
///
/// Total bytes inserted: `1 + text.len()`. The block's content
/// occupies `[byte_pos + 1, byte_pos + 1 + text.len())`. The block
/// entry is registered at `byte_pos + 1` in `block_offsets`.
///
/// Existing entries with `byte_start == byte_pos` (e.g. a previous
/// empty block whose end coincides with this insertion point) are
/// kept BEFORE the new entry in the Vec, since the inserted `\n`
/// boundary belongs after them. Entries strictly past `byte_pos`
/// shift forward by `(1 + text.len())` bytes.
///
/// When `byte_pos == total_bytes`, behaves like
/// `rope_insert_block_boundary` followed by `rope_append_block`.
pub fn rope_insert_block_at(store: &Store, byte_pos: u32, block_id: EntityId, text: &str) {
    let delta = (1 + text.len()) as i32;
    // Vec position: insert AFTER any entry at byte_pos itself
    // (those represent earlier empty blocks whose `\n` boundary
    // we are placing now). Only entries strictly past byte_pos
    // come after our new entry in the Vec.
    let new_entry_vec_pos = {
        let offsets = store.block_offsets.read().unwrap();
        offsets
            .entries
            .iter()
            .position(|(_, bs)| *bs > byte_pos)
            .unwrap_or(offsets.entries.len())
    };
    {
        let mut rope = store.rope.write().unwrap();
        let char_idx = rope.byte_to_char(byte_pos as usize);
        let mut combined = String::with_capacity(1 + text.len());
        combined.push('\n');
        combined.push_str(text);
        rope.insert(char_idx, &combined);
    }
    let mut offsets = store.block_offsets.write().unwrap();
    // Shift entries strictly past byte_pos. Entries AT byte_pos
    // (the prior empty block) stay where they are — the new `\n`
    // is conceptually "after" them.
    offsets.shift_after(byte_pos + 1, delta);
    offsets.insert_at(
        new_entry_vec_pos,
        OffsetMarker::Block(block_id),
        byte_pos + 1,
    );
}

/// Walks up `frame.parent_frame` to find the top-level ancestor of
/// the given frame, then returns the end byte of that top-level
/// frame's current rope range — i.e. the byte position where blocks
/// belonging to that frame's subtree (e.g. table cells per plan §1.6)
/// should be inserted so they land BEFORE any following top-level
/// frame's content.
///
/// Reads `block_offsets`/`frames`/`tables`/`table_cells` directly, so
/// the result is fresh even when `Frame.byte_range` has not yet been
/// recomputed at commit time.
pub fn top_level_frame_end_byte(store: &Store, frame_id: EntityId) -> u32 {
    let top_id = {
        let frames = store.frames.read().unwrap();
        let mut current = frame_id;
        loop {
            let Some(f) = frames.get(&current) else {
                return 0;
            };
            match f.parent_frame {
                None => break current,
                Some(p) => current = p,
            }
        }
    };
    let (_min, max) = compute_frame_byte_range_recursive(store, top_id);
    max
}

/// Append a new empty block to the end of the rope, separating it
/// from any prior content with a `\n` boundary (only if the rope is
/// already non-empty). Registers `block_id` at the resulting byte
/// position. Returns that byte position. Used when `insert_frame_uc`
/// creates a new top-level frame with a single empty block.
pub fn rope_append_empty_block(store: &Store, block_id: EntityId) -> u32 {
    let was_empty = store.rope.read().unwrap().len_bytes() == 0;
    if !was_empty {
        rope_insert_block_boundary(store);
    }
    let pos = store.rope.read().unwrap().len_bytes() as u32;
    let mut offsets = store.block_offsets.write().unwrap();
    offsets.push_block(block_id, pos);
    offsets.set_total_bytes(pos);
    pos
}

/// Append a single `\n` inter-block boundary character to the end of
/// the rope. Does NOT register a block — this is the sentinel between
/// two adjacent blocks within the same frame (plan §1.4).
pub fn rope_insert_block_boundary(store: &Store) {
    let mut rope = store.rope.write().unwrap();
    let char_end = rope.len_chars();
    rope.insert(char_end, "\n");
    let new_total = rope.len_bytes() as u32;
    drop(rope);

    store
        .block_offsets
        .write()
        .unwrap()
        .set_total_bytes(new_total);
}

/// Insert `text` at `byte_offset_in_block` inside the block identified
/// by `block_id`. Looks up the block's start in the rope via
/// `block_offsets.range_of()`, splices into the rope, and shifts
/// subsequent block offsets by the inserted byte length.
///
/// Silently no-ops if the block is not registered in the offset index
/// (this can happen for blocks whose content lives outside the global
/// rope, e.g. table cells until step 5.5).
pub fn rope_insert_in_block(
    store: &Store,
    block_id: EntityId,
    byte_offset_in_block: u32,
    text: &str,
) {
    let inserted_bytes = text.len() as u32;
    if inserted_bytes == 0 {
        return;
    }
    let block_byte_start = {
        let offsets = store.block_offsets.read().unwrap();
        let Some((start, _end)) = offsets.range_of_block(block_id) else {
            return;
        };
        start
    };
    let rope_byte = block_byte_start + byte_offset_in_block;
    {
        let mut rope = store.rope.write().unwrap();
        let char_idx = rope.byte_to_char(rope_byte as usize);
        rope.insert(char_idx, text);
    }
    // Shift entries past this block by inserted_bytes. Threshold
    // is one byte past block_byte_start so the current block's own
    // entry isn't moved.
    store
        .block_offsets
        .write()
        .unwrap()
        .shift_after(block_byte_start + 1, inserted_bytes as i32);
}

/// Split an existing block in the rope at `byte_offset_in_block`:
/// - inserts a `\n` inter-block boundary at the absolute byte position
///   `block_start + byte_offset_in_block` in the rope
/// - shifts entries past that position by +1 byte
/// - inserts a new entry for `new_block_id` at
///   `block_start + byte_offset_in_block + 1` (right after the newline),
///   placed immediately after the original block in the entries Vec
///
/// `byte_offset_in_block` may be 0 (split before first char of block,
/// i.e. insert empty block before this one) or equal to the block's
/// byte length (split after last char, i.e. insert empty block after).
pub fn rope_split_block(
    store: &Store,
    current_block_id: EntityId,
    byte_offset_in_block: u32,
    new_block_id: EntityId,
) {
    let current_marker = OffsetMarker::Block(current_block_id);
    let (block_start, current_idx) = {
        let offsets = store.block_offsets.read().unwrap();
        let Some((start, _end)) = offsets.range_of(current_marker) else {
            return;
        };
        let idx = offsets
            .entries
            .iter()
            .position(|(m, _)| *m == current_marker)
            .unwrap();
        (start, idx)
    };
    let split_byte = block_start + byte_offset_in_block;

    // 1. Insert the `\n` boundary at the split point.
    {
        let mut rope = store.rope.write().unwrap();
        let char_idx = rope.byte_to_char(split_byte as usize);
        rope.insert(char_idx, "\n");
    }

    // 2. Shift entries past the split (and total_bytes) by +1.
    //    Threshold > split_byte so the new entry we insert next
    //    isn't double-shifted.
    store
        .block_offsets
        .write()
        .unwrap()
        .shift_after(split_byte + 1, 1);

    // 3. Register the new block at `split_byte + 1`, immediately
    //    after the original in the entries Vec.
    store.block_offsets.write().unwrap().insert_at(
        current_idx + 1,
        OffsetMarker::Block(new_block_id),
        split_byte + 1,
    );
}

/// Merge `start_block` and `end_block` by deleting the rope range
/// `[start_block.start + byte_so .. end_block.start + byte_eo)` — i.e.
/// the suffix of `start_block`, every block between (and their
/// boundary newlines), and the prefix of `end_block`. Removes the
/// index entries for every block strictly between `start_block` and
/// `end_block` (inclusive of `end_block` itself); the surviving
/// content lives in `start_block`. Shifts any blocks past `end_block`
/// by the negative delta.
///
/// No-op if `start_block` is not in the index. Skipped for any
/// intermediate block id whose range is missing from the index (e.g.
/// table cells until step 5.5e).
pub fn rope_merge_block_range(
    store: &Store,
    start_block_id: EntityId,
    byte_so_in_start: u32,
    end_block_id: EntityId,
    byte_eo_in_end: u32,
) {
    let start_marker = OffsetMarker::Block(start_block_id);
    let end_marker = OffsetMarker::Block(end_block_id);
    let (start_block_byte, end_block_byte, start_idx, end_idx) = {
        let offsets = store.block_offsets.read().unwrap();
        let Some((sb, _)) = offsets.range_of(start_marker) else {
            return;
        };
        let Some((eb, _)) = offsets.range_of(end_marker) else {
            return;
        };
        let si = offsets
            .entries
            .iter()
            .position(|(m, _)| *m == start_marker)
            .unwrap();
        let ei = offsets
            .entries
            .iter()
            .position(|(m, _)| *m == end_marker)
            .unwrap();
        (sb, eb, si, ei)
    };
    if end_idx <= start_idx {
        return;
    }

    let delete_start = start_block_byte + byte_so_in_start;
    let delete_end = end_block_byte + byte_eo_in_end;
    if delete_end <= delete_start {
        return;
    }
    let deleted_bytes = delete_end - delete_start;

    // 1. Remove the rope range.
    {
        let mut rope = store.rope.write().unwrap();
        let char_start = rope.byte_to_char(delete_start as usize);
        let char_end = rope.byte_to_char(delete_end as usize);
        rope.remove(char_start..char_end);
    }

    // 2. Remove block_offsets entries for [start_idx+1 ..= end_idx].
    {
        let mut offsets = store.block_offsets.write().unwrap();
        offsets.drain_inclusive(start_idx + 1, end_idx);
    }

    // 3. Shift any remaining entries past the deletion by -deleted_bytes.
    //    Threshold > delete_start because start_block's own entry
    //    sits at delete_start - byte_so_in_start (≤ delete_start)
    //    and must not move.
    store
        .block_offsets
        .write()
        .unwrap()
        .shift_after(delete_start + 1, -(deleted_bytes as i32));
}

/// Insert a U+FFFC OBJECT REPLACEMENT CHARACTER sentinel in the rope
/// at the table-anchor position, registering a `TableAnchor(table_id)`
/// marker in the offset index (plan §1.6).
///
/// `target_block_id` is the block in the parent frame that the table
/// is adjacent to. `after` controls whether the table goes BEFORE
/// (`after = false`) or AFTER the target block.
///
/// The 3-byte sentinel is paired with an inter-marker `\n`:
/// - `after = false`: inserts `\u{FFFC}\n` at `target.byte_start`
/// - `after = true`, target is NOT the last entry: inserts
///   `\u{FFFC}\n` at `target.byte_end` (between target's trailing
///   `\n` and the next entry)
/// - `after = true`, target IS the last entry: inserts `\n\u{FFFC}`
///   at `target.byte_end` (rope now ends with the sentinel)
///
/// NOTE: cell-internal content is not yet routed through the rope —
/// the rope reflects table *presence* (3-byte sentinel) only.
/// Routing cell content is deferred to plan §1.6's `Frame.byte_range`
/// model.
///
/// No-op if `target_block_id` is not in the index.
pub fn rope_insert_table_anchor(
    store: &Store,
    table_id: EntityId,
    target_block_id: EntityId,
    after: bool,
) {
    const SENTINEL: &str = "\u{FFFC}"; // 3 bytes
    const SENTINEL_BYTES: u32 = 3;

    let (insert_pos, target_idx, target_is_last) = {
        let offsets = store.block_offsets.read().unwrap();
        let target_marker = OffsetMarker::Block(target_block_id);
        let Some((start, end)) = offsets.range_of(target_marker) else {
            return;
        };
        let idx = offsets
            .entries
            .iter()
            .position(|(m, _)| *m == target_marker)
            .unwrap();
        let is_last = idx + 1 == offsets.entries.len();
        let pos = if after { end } else { start };
        (pos, idx, is_last)
    };

    // Insertion strategy:
    let (rope_inserted, marker_byte_start, new_entry_pos, shift_threshold, shift_delta) = if !after
    {
        // Before target: "\u{FFFC}\n" at target.byte_start
        ("\u{FFFC}\n", insert_pos, target_idx, insert_pos, 4i32)
    } else if !target_is_last {
        // After target, with following entries: "\u{FFFC}\n"
        // at target.byte_end. The TableAnchor sits where the
        // next block USED to start; that following entry
        // shifts by 4.
        ("\u{FFFC}\n", insert_pos, target_idx + 1, insert_pos, 4i32)
    } else {
        // After target which is last: "\n\u{FFFC}" appended.
        // TableAnchor's byte_start sits 1 past the original
        // total (after the new `\n`).
        (
            "\n\u{FFFC}",
            insert_pos + 1,
            target_idx + 1,
            insert_pos,
            4i32,
        )
    };

    // 1. Splice the literal bytes into the rope.
    {
        let mut rope = store.rope.write().unwrap();
        let char_idx = rope.byte_to_char(insert_pos as usize);
        rope.insert(char_idx, rope_inserted);
    }

    // 2. Shift entries past the insertion point. Use shift_after
    //    BEFORE inserting our new entry so we don't double-shift.
    store
        .block_offsets
        .write()
        .unwrap()
        .shift_after(shift_threshold, shift_delta);

    // 3. Register the TableAnchor at the resolved byte position
    //    and the resolved Vec position.
    store.block_offsets.write().unwrap().insert_at(
        new_entry_pos,
        OffsetMarker::TableAnchor(table_id),
        marker_byte_start,
    );

    // Note: SENTINEL_BYTES is part of `shift_delta` (3 for the
    // sentinel + 1 for the `\n`).
    let _ = SENTINEL;
    let _ = SENTINEL_BYTES;
}

/// Append a U+FFFC table-anchor sentinel at the end of the rope and
/// register a `TableAnchor(table_id)` marker. If the rope is already
/// non-empty, prepends a `\n` boundary so the sentinel doesn't run
/// into the previous entry's content.
///
/// Used by import paths (`import_html_uc`, `import_markdown_uc`)
/// that process the document linearly and append entities as they
/// encounter them, rather than inserting relative to an existing
/// target block.
pub fn rope_append_table_anchor(store: &Store, table_id: EntityId) {
    let (anchor_byte_start, new_total) = {
        let mut rope = store.rope.write().unwrap();
        let was_empty = rope.len_bytes() == 0;
        let char_end = rope.len_chars();
        let to_insert = if was_empty { "\u{FFFC}" } else { "\n\u{FFFC}" };
        rope.insert(char_end, to_insert);
        let new_total = rope.len_bytes() as u32;
        // Sentinel is 3 bytes; if a `\n` was prepended that's 1 byte
        // before the sentinel.
        let anchor_byte_start = new_total - 3;
        (anchor_byte_start, new_total)
    };

    let mut offsets = store.block_offsets.write().unwrap();
    offsets.push(OffsetMarker::TableAnchor(table_id), anchor_byte_start);
    offsets.set_total_bytes(new_total);
}

/// Remove a TableAnchor sentinel from the rope, undoing the effect
/// of `rope_insert_table_anchor`. Looks up the anchor's byte range
/// (always 3 bytes for the U+FFFC plus 1 byte of inter-marker `\n`
/// either before or after, depending on what's adjacent), removes
/// those 4 bytes from the rope, drops the entry, shifts trailing
/// entries by -4.
///
/// No-op if no TableAnchor for `table_id` exists.
pub fn rope_remove_table_anchor(store: &Store, table_id: EntityId) {
    let anchor_marker = OffsetMarker::TableAnchor(table_id);
    let (anchor_byte_start, anchor_idx, anchor_is_last, has_predecessor) = {
        let offsets = store.block_offsets.read().unwrap();
        let Some((start, _end)) = offsets.range_of(anchor_marker) else {
            return;
        };
        let idx = offsets
            .entries
            .iter()
            .position(|(m, _)| *m == anchor_marker)
            .unwrap();
        let is_last = idx + 1 == offsets.entries.len();
        let has_pred = idx > 0;
        (start, idx, is_last, has_pred)
    };

    // Symmetric to insert_table_anchor. The 4 bytes to remove are:
    // - if anchor is last: [byte_start - 1 .. byte_start + 3) — the
    //   preceding `\n` + the 3-byte sentinel
    // - otherwise: [byte_start .. byte_start + 4) — the sentinel
    //   + the following `\n`
    let (remove_start, remove_end) = if anchor_is_last && has_predecessor {
        (anchor_byte_start - 1, anchor_byte_start + 3)
    } else {
        (anchor_byte_start, anchor_byte_start + 4)
    };

    {
        let mut rope = store.rope.write().unwrap();
        let char_start = rope.byte_to_char(remove_start as usize);
        let char_end = rope.byte_to_char(remove_end as usize);
        rope.remove(char_start..char_end);
    }
    {
        let mut offsets = store.block_offsets.write().unwrap();
        offsets.remove_at(anchor_idx);
    }
    store
        .block_offsets
        .write()
        .unwrap()
        .shift_after(remove_start, -4);
}

/// Remove a registered block from the rope: drops its content bytes
/// plus one boundary `\n` (the one after, if the block has a
/// successor; the one before, if it's the last entry), removes the
/// entry from the index, and shifts trailing entries by the negative
/// byte delta.
///
/// No-op if `block_id` is not in the index. No-op for the special
/// case of a single-block document being asked to remove its sole
/// block (we'd produce an empty rope but the block itself is being
/// cascaded by the caller).
pub fn rope_remove_block(store: &Store, block_id: EntityId) {
    let block_marker = OffsetMarker::Block(block_id);
    let (block_start, block_end, idx, is_last, has_pred) = {
        let offsets = store.block_offsets.read().unwrap();
        let Some((start, end)) = offsets.range_of(block_marker) else {
            return;
        };
        let idx = offsets
            .entries
            .iter()
            .position(|(m, _)| *m == block_marker)
            .unwrap();
        let is_last = idx + 1 == offsets.entries.len();
        let has_pred = idx > 0;
        (start, end, idx, is_last, has_pred)
    };

    // Determine the byte range to delete:
    // - if there's a successor: [block_start..block_end) — the
    //   block's content INCLUDING its trailing boundary `\n`
    //   (which is the byte at block_end - 1)
    // - if last and has predecessor: [block_start - 1..block_end)
    //   — also delete the LEADING boundary `\n` that the previous
    //   entry placed before us
    // - if last and no predecessor (sole entry): just delete
    //   [block_start..block_end) (no boundary `\n` exists)
    let (remove_start, remove_end) = if is_last && has_pred {
        (block_start.saturating_sub(1), block_end)
    } else {
        (block_start, block_end)
    };
    if remove_end <= remove_start {
        // Drop the entry only; nothing to remove from the rope.
        store.block_offsets.write().unwrap().remove_at(idx);
        return;
    }
    let deleted_bytes = remove_end - remove_start;

    {
        let mut rope = store.rope.write().unwrap();
        let char_start = rope.byte_to_char(remove_start as usize);
        let char_end = rope.byte_to_char(remove_end as usize);
        rope.remove(char_start..char_end);
    }
    store.block_offsets.write().unwrap().remove_at(idx);
    // Shift entries STRICTLY PAST the removed range. Using `remove_end` as
    // the threshold (rather than `remove_start`) keeps an empty predecessor
    // whose byte_start equals `remove_start` (the leading boundary `\n` we
    // just deleted) in place — its content position is unchanged, only its
    // trailing boundary is gone. `total_bytes` decreases by `deleted_bytes`
    // regardless of threshold.
    store
        .block_offsets
        .write()
        .unwrap()
        .shift_after(remove_end, -(deleted_bytes as i32));
}

/// Replace the entire content of a registered block in the rope with
/// `new_text`. Preserves the block's `byte_start` and its trailing
/// boundary `\n` (if any); subsequent entries shift by the net
/// length delta.
///
/// Used by use cases that compute a block's final content as a string
/// and want to push that content to the rope in one shot — e.g. the
/// block-splitting branches of `insert_html_at_position_uc` and
/// `insert_markdown_at_position_uc`, where each affected block
/// (head, tail, mid-replacement) gets a single new value.
///
/// No-op if `block_id` is not in the index.
pub fn rope_replace_block_content(store: &Store, block_id: EntityId, new_text: &str) {
    let (block_byte_start, content_bytes) = {
        let offsets = store.block_offsets.read().unwrap();
        let Some((start, end)) = offsets.range_of_block(block_id) else {
            return;
        };
        let total = offsets.total_bytes();
        // `range_of` extends to the next entry's `byte_start` (or to
        // `total_bytes`). If there's a following entry, the byte at
        // `end - 1` is the inter-block boundary `\n` that belongs to
        // the boundary between this block and the next, not to this
        // block's content.
        let has_trailing_boundary = end < total;
        let content_bytes = if has_trailing_boundary {
            end - start - 1
        } else {
            end - start
        };
        (start, content_bytes)
    };

    let new_bytes = new_text.len() as u32;
    if content_bytes == 0 && new_bytes == 0 {
        return;
    }
    let delta = new_bytes as i32 - content_bytes as i32;

    // Splice [block_byte_start..block_byte_start + content_bytes)
    // with `new_text`.
    {
        let mut rope = store.rope.write().unwrap();
        let char_start = rope.byte_to_char(block_byte_start as usize);
        if content_bytes > 0 {
            let char_end = rope.byte_to_char((block_byte_start + content_bytes) as usize);
            rope.remove(char_start..char_end);
        }
        if new_bytes > 0 {
            rope.insert(char_start, new_text);
        }
    }

    if delta != 0 {
        // Shift entries that sit strictly past this block's start
        // (i.e. the trailing boundary and everything after).
        store
            .block_offsets
            .write()
            .unwrap()
            .shift_after(block_byte_start + 1, delta);
    }
}

/// Delete bytes `[byte_start_in_block..byte_end_in_block)` from inside
/// the block identified by `block_id`. Shifts subsequent block offsets
/// by the deleted byte length. No-op for blocks not in the index.
pub fn rope_delete_in_block(
    store: &Store,
    block_id: EntityId,
    byte_start_in_block: u32,
    byte_end_in_block: u32,
) {
    if byte_end_in_block <= byte_start_in_block {
        return;
    }
    let deleted_bytes = byte_end_in_block - byte_start_in_block;
    let block_byte_start = {
        let offsets = store.block_offsets.read().unwrap();
        let Some((start, _end)) = offsets.range_of_block(block_id) else {
            return;
        };
        start
    };
    let rope_byte_start = block_byte_start + byte_start_in_block;
    let rope_byte_end = block_byte_start + byte_end_in_block;
    {
        let mut rope = store.rope.write().unwrap();
        let char_start = rope.byte_to_char(rope_byte_start as usize);
        let char_end = rope.byte_to_char(rope_byte_end as usize);
        rope.remove(char_start..char_end);
    }
    store
        .block_offsets
        .write()
        .unwrap()
        .shift_after(block_byte_start + 1, -(deleted_bytes as i32));
}

/// Recompute `Frame.byte_range` for every frame in `store.frames`
/// based on current `block_offsets` and the frame tree structure.
/// Plan §1.6 invariant: each frame's byte_range is the (min_start,
/// max_end) over all its descendant blocks, sub-frames, and table
/// anchors+cells.
///
/// Call this after any mutation that affects rope byte positions.
/// O(F + B) where F = frames, B = blocks in the document.
pub fn recompute_all_frame_byte_ranges(store: &Store) {
    let frame_ids: Vec<EntityId> = {
        let frames = store.frames.read().unwrap();
        frames.keys().copied().collect()
    };
    for fid in frame_ids {
        let new_range = compute_frame_byte_range_recursive(store, fid);
        let mut frames = store.frames.write().unwrap();
        if let Some(f) = frames.get(&fid).cloned()
            && f.byte_range != new_range
        {
            let mut updated = f;
            updated.byte_range = new_range;
            frames.insert(fid, updated);
        }
    }
}

fn compute_frame_byte_range_recursive(store: &Store, frame_id: EntityId) -> (u32, u32) {
    let mut bounds: Option<(u32, u32)> = None;
    walk_frame_bounds(store, frame_id, &mut bounds);
    bounds.unwrap_or((0, 0))
}

fn walk_frame_bounds(store: &Store, frame_id: EntityId, bounds: &mut Option<(u32, u32)>) {
    fn merge(bounds: &mut Option<(u32, u32)>, s: u32, e: u32) {
        *bounds = Some(match *bounds {
            None => (s, e),
            Some((min, max)) => (min.min(s), max.max(e)),
        });
    }

    let (blocks, child_order, table_id) = {
        let frames = store.frames.read().unwrap();
        let Some(f) = frames.get(&frame_id) else {
            return;
        };
        (f.blocks.clone(), f.child_order.clone(), f.table)
    };

    {
        let offsets = store.block_offsets.read().unwrap();
        for bid in &blocks {
            if let Some((s, e)) = offsets.range_of_block(*bid) {
                merge(bounds, s, e);
            }
        }
        if let Some(tid) = table_id
            && let Some((s, e)) = offsets.range_of(OffsetMarker::TableAnchor(tid))
        {
            merge(bounds, s, e);
        }
    }

    for entry in &child_order {
        if *entry < 0 {
            walk_frame_bounds(store, (-*entry) as EntityId, bounds);
        }
    }

    if let Some(tid) = table_id {
        let cell_ids: Vec<EntityId> = {
            let tables = store.tables.read().unwrap();
            tables
                .get(&tid)
                .map(|t| t.cells.clone())
                .unwrap_or_default()
        };
        for cell_id in &cell_ids {
            let cell_frame_id = {
                let cells = store.table_cells.read().unwrap();
                cells.get(cell_id).and_then(|c| c.cell_frame)
            };
            if let Some(cfid) = cell_frame_id {
                walk_frame_bounds(store, cfid, bounds);
            }
        }
    }
}