Skip to main content

common/
format_runs.rs

1//! Per-block character formatting as sorted, non-overlapping byte spans.
2//!
3//! Each block carries a `Vec<FormatRun>` (formatting) and a
4//! `Vec<ImageAnchor>` (image positions). The block's `plain_text` is
5//! the authoritative character source for byte offsets used by both.
6//! Replaces the pre-Phase-1 model where every formatted run and every
7//! inline image was a row in the now-deleted `inline_elements` entity
8//! table; the [`InlineSegment`] type in this module is a transient
9//! view synthesized from `(plain_text, format_runs, block_images)`
10//! for readers (export, fragments, cursor) that still consume a
11//! per-segment shape.
12//!
13//! Invariants are documented on [`FormatRun`] and enforced by
14//! [`debug_assert_well_formed`] and by [`splice_range`] / [`shift_after`]
15//! which rebuild the run list while preserving them.
16
17use crate::entities::{CharVerticalAlignment, UnderlineStyle};
18use serde::{Deserialize, Serialize};
19
20/// Content type for an inline segment: text, image, or empty.
21#[derive(Serialize, Deserialize, Default, Clone, Debug, PartialEq, Eq)]
22pub enum InlineContent {
23    #[default]
24    Empty,
25    Text(String),
26    Image {
27        name: String,
28        width: i64,
29        height: i64,
30        quality: i64,
31    },
32}
33
34/// A lean view type representing one inline segment (text or image) with its
35/// associated formatting. Used by readers (export, fragments, cursor) to
36/// consume per-segment data synthesized from `(plain_text, format_runs,
37/// block_images)` via [`crate::format_runs_query::inline_segments_for_block`].
38/// Never stored — synthesized on demand.
39///
40/// The `fmt_*` field names match those on `Block` and on `FragmentElement`
41/// so readers can copy fields verbatim across the three types.
42#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
43pub struct InlineSegment {
44    pub content: InlineContent,
45    pub fmt_font_family: Option<String>,
46    pub fmt_font_point_size: Option<i64>,
47    pub fmt_font_weight: Option<i64>,
48    pub fmt_font_bold: Option<bool>,
49    pub fmt_font_italic: Option<bool>,
50    pub fmt_font_underline: Option<bool>,
51    pub fmt_font_overline: Option<bool>,
52    pub fmt_font_strikeout: Option<bool>,
53    pub fmt_letter_spacing: Option<i64>,
54    pub fmt_word_spacing: Option<i64>,
55    pub fmt_anchor_href: Option<String>,
56    pub fmt_anchor_names: Vec<String>,
57    pub fmt_is_anchor: Option<bool>,
58    pub fmt_tooltip: Option<String>,
59    pub fmt_underline_style: Option<UnderlineStyle>,
60    pub fmt_vertical_alignment: Option<CharVerticalAlignment>,
61}
62
63/// Character-level formatting for a contiguous byte span. One per
64/// [`FormatRun`]; one per [`ImageAnchor`]. Fields mirror the `fmt_*`
65/// set on [`InlineSegment`] and on `FragmentElement` so values copy
66/// across types verbatim.
67#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
68pub struct CharacterFormat {
69    pub font_family: Option<String>,
70    pub font_point_size: Option<i64>,
71    pub font_weight: Option<i64>,
72    pub font_bold: Option<bool>,
73    pub font_italic: Option<bool>,
74    pub font_underline: Option<bool>,
75    pub font_overline: Option<bool>,
76    pub font_strikeout: Option<bool>,
77    pub letter_spacing: Option<i64>,
78    pub word_spacing: Option<i64>,
79    pub anchor_href: Option<String>,
80    pub anchor_names: Vec<String>,
81    pub is_anchor: Option<bool>,
82    pub tooltip: Option<String>,
83    pub underline_style: Option<UnderlineStyle>,
84    pub vertical_alignment: Option<CharVerticalAlignment>,
85}
86
87/// One run of identical character formatting inside a block. Byte offsets
88/// are relative to the block's `plain_text` (Phase 1) or to the block's
89/// rope range (Phase 2).
90#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
91pub struct FormatRun {
92    pub byte_start: u32,
93    pub byte_end: u32,
94    pub format: CharacterFormat,
95}
96
97/// An image embedded at a specific byte position inside a block. In
98/// Phase 1 the byte position is an index into the block's `plain_text`;
99/// in Phase 2 it points at the U+FFFC sentinel character in the rope.
100///
101/// Images carry their own [`CharacterFormat`] because vertical alignment
102/// and anchor metadata apply per inline run.
103#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
104pub struct ImageAnchor {
105    pub byte_offset: u32,
106    pub name: String,
107    pub width: i64,
108    pub height: i64,
109    pub quality: i64,
110    pub format: CharacterFormat,
111}
112
113/// Debug-only invariant check. Run from `debug_assert!` callsites in
114/// the use cases that mutate format runs. Cheap: O(n) where n is the
115/// run count (typically < 100 per block in real prose).
116///
117/// # Invariants
118/// 1. Runs are sorted by `byte_start` ascending.
119/// 2. Each run has `byte_start < byte_end`.
120/// 3. Runs are non-overlapping: `runs[i].byte_end <= runs[i+1].byte_start`.
121/// 4. The last run's `byte_end` does not exceed `block_text_len`.
122/// 5. Adjacent runs with identical format are coalesced (no two
123///    consecutive runs satisfy `byte_end == next.byte_start &&
124///    format == next.format`).
125pub fn debug_assert_well_formed(runs: &[FormatRun], block_text_len: usize) {
126    if runs.is_empty() {
127        return;
128    }
129    for run in runs {
130        debug_assert!(
131            run.byte_start < run.byte_end,
132            "format run is empty or reversed: {run:?}"
133        );
134    }
135    for i in 0..runs.len() - 1 {
136        debug_assert!(
137            runs[i].byte_end <= runs[i + 1].byte_start,
138            "format runs overlap or unsorted at {i}: {:?} then {:?}",
139            runs[i],
140            runs[i + 1]
141        );
142        debug_assert!(
143            !(runs[i].byte_end == runs[i + 1].byte_start && runs[i].format == runs[i + 1].format),
144            "adjacent identical format runs at {i} not coalesced: {:?}",
145            runs[i]
146        );
147    }
148    debug_assert!(
149        runs.last().unwrap().byte_end as usize <= block_text_len,
150        "last format run {:?} exceeds block text len {block_text_len}",
151        runs.last().unwrap()
152    );
153}
154
155/// Merge adjacent runs that have identical formatting. O(n).
156pub fn coalesce_in_place(runs: &mut Vec<FormatRun>) {
157    if runs.len() < 2 {
158        return;
159    }
160    let mut write = 0usize;
161    for read in 1..runs.len() {
162        if runs[write].byte_end == runs[read].byte_start && runs[write].format == runs[read].format
163        {
164            runs[write].byte_end = runs[read].byte_end;
165        } else {
166            write += 1;
167            if write != read {
168                runs[write] = runs[read].clone();
169            }
170        }
171    }
172    runs.truncate(write + 1);
173}
174
175/// Replace the runs covering `range` with `replacement`, preserving the
176/// invariants. Runs that straddle the range boundary are clipped on
177/// either side; runs fully contained are removed.
178///
179/// The replacement byte ranges must lie within `range` and themselves
180/// be well-formed (sorted, non-overlapping). The function does NOT
181/// shift bytes after `range.end` — callers wanting to splice in a
182/// different-length text must call [`shift_after`] first or after,
183/// depending on whether the text length is changing.
184pub fn splice_range(
185    runs: &mut Vec<FormatRun>,
186    range: std::ops::Range<u32>,
187    replacement: Vec<FormatRun>,
188) {
189    debug_assert!(range.start <= range.end);
190    for r in &replacement {
191        debug_assert!(r.byte_start >= range.start && r.byte_end <= range.end);
192    }
193
194    let mut result: Vec<FormatRun> = Vec::with_capacity(runs.len() + replacement.len());
195
196    // Keep / clip everything strictly before range.start.
197    for run in runs.iter() {
198        if run.byte_end <= range.start {
199            result.push(run.clone());
200        } else if run.byte_start < range.start {
201            // Run straddles range.start: keep the left part.
202            result.push(FormatRun {
203                byte_start: run.byte_start,
204                byte_end: range.start,
205                format: run.format.clone(),
206            });
207        }
208    }
209
210    // Insert the replacement runs.
211    result.extend(replacement);
212
213    // Keep / clip everything starting at or after range.end.
214    for run in runs.iter() {
215        if run.byte_start >= range.end {
216            result.push(run.clone());
217        } else if run.byte_end > range.end {
218            // Run straddles range.end: keep the right part.
219            result.push(FormatRun {
220                byte_start: range.end,
221                byte_end: run.byte_end,
222                format: run.format.clone(),
223            });
224        }
225    }
226
227    coalesce_in_place(&mut result);
228    *runs = result;
229}
230
231/// Capture the slice of `runs` that intersects `[start..end)`, clipped
232/// to those bounds. Used by hand-rolled-inverse undo for format-only
233/// edits: callers capture this BEFORE calling [`splice_range`], and
234/// on undo splice the captured runs back into the same byte range to
235/// restore the prior state without paying the cost of a full
236/// `RopeStoreSnapshot`.
237///
238/// Gaps in the original runs (positions inside `[start..end)` with no
239/// formatting) become gaps in the captured output too — the undo
240/// splice preserves them faithfully.
241pub fn capture_runs_in_range(runs: &[FormatRun], start: u32, end: u32) -> Vec<FormatRun> {
242    let mut out = Vec::new();
243    for run in runs {
244        if run.byte_end <= start || run.byte_start >= end {
245            continue;
246        }
247        let clipped_start = std::cmp::max(run.byte_start, start);
248        let clipped_end = std::cmp::min(run.byte_end, end);
249        if clipped_start < clipped_end {
250            out.push(FormatRun {
251                byte_start: clipped_start,
252                byte_end: clipped_end,
253                format: run.format.clone(),
254            });
255        }
256    }
257    out
258}
259
260/// Capture the `(byte_offset, format)` pairs for every image anchor
261/// inside `[start..end)`. Used together with [`capture_runs_in_range`]
262/// by hand-rolled-inverse undo for format-only edits.
263pub fn capture_image_formats_in_range(
264    images: &[ImageAnchor],
265    start: u32,
266    end: u32,
267) -> Vec<(u32, CharacterFormat)> {
268    let mut out = Vec::new();
269    for img in images {
270        if img.byte_offset >= start && img.byte_offset < end {
271            out.push((img.byte_offset, img.format.clone()));
272        }
273    }
274    out
275}
276
277/// Shift the byte offsets of every run whose `byte_start >= threshold`
278/// by `delta`. Used after a text insert/delete to keep downstream runs
279/// in sync with the new block text. Runs strictly before the threshold
280/// are unaffected; runs that straddle the threshold are left alone
281/// (the caller should have spliced them first).
282///
283/// Panics in debug mode if `delta` would underflow a run's offset.
284pub fn shift_after(runs: &mut [FormatRun], threshold: u32, delta: i32) {
285    for run in runs.iter_mut() {
286        if run.byte_start >= threshold {
287            let new_start = (run.byte_start as i64) + (delta as i64);
288            let new_end = (run.byte_end as i64) + (delta as i64);
289            debug_assert!(new_start >= 0 && new_end >= new_start);
290            run.byte_start = new_start as u32;
291            run.byte_end = new_end as u32;
292        }
293    }
294}
295
296/// Synthesize a stable per-fragment id from a block id and byte offset
297/// within that block. Populates the `element_id` field in
298/// `FragmentContent::{Text, Image}` (the public layout-engine type),
299/// giving callers a stable handle across renders even though the
300/// underlying [`InlineSegment`]s are never stored. Two segments at the
301/// same `(block_id, byte_start)` always produce the same id; a segment
302/// that moves to a new byte_start (e.g. due to an insert upstream)
303/// gets a new id.
304///
305/// Bit layout (u64): bit 62 = synth tag (so synthesized ids never
306/// collide with real entity ids issued by the store's counter, which
307/// start at 1 and grow upward). Bits 32..62 = block id (1 billion
308/// blocks per document, 30 bits). Bottom 32 bits = byte offset (4 GB
309/// per block). The top bit stays zero so the value fits in positive
310/// i64 range — public DTOs expose element_id as i64.
311pub fn synth_element_id(block_id: u64, byte_start: u32) -> u64 {
312    const SYNTH_TAG: u64 = 0x4000_0000_0000_0000;
313    SYNTH_TAG | ((block_id & 0x3FFF_FFFF) << 32) | (byte_start as u64)
314}
315
316/// Same as `shift_after` for image anchors. Anchors AT the threshold are
317/// shifted (treated as part of the inserted region's right side).
318pub fn shift_images_after(images: &mut [ImageAnchor], threshold: u32, delta: i32) {
319    for img in images.iter_mut() {
320        if img.byte_offset >= threshold {
321            let new_off = (img.byte_offset as i64) + (delta as i64);
322            debug_assert!(new_off >= 0);
323            img.byte_offset = new_off as u32;
324        }
325    }
326}
327
328// ─────────────────────────────────────────────────────────────────────
329// Composite helpers used by writer use cases. These keep the per-block
330// run / image vectors well-formed under insert / delete / split.
331// ─────────────────────────────────────────────────────────────────────
332
333/// Apply an "insert `inserted_bytes` of text at `byte_offset`" mutation
334/// to a block's runs in place. Runs strictly before the offset are
335/// unchanged; runs strictly after are shifted by +inserted_bytes; runs
336/// that straddle the offset are extended (the inserted text inherits
337/// the surrounding run's format — Qt / ProseMirror convention).
338pub fn shift_runs_for_insert(runs: &mut [FormatRun], byte_offset: u32, inserted_bytes: u32) {
339    if inserted_bytes == 0 {
340        return;
341    }
342    for run in runs.iter_mut() {
343        if run.byte_start >= byte_offset {
344            run.byte_start += inserted_bytes;
345            run.byte_end += inserted_bytes;
346        } else if run.byte_end >= byte_offset {
347            // Run straddles the insertion point, or its right edge sits
348            // exactly on it. In both cases the inserted text inherits
349            // this run's format (Qt convention).
350            run.byte_end += inserted_bytes;
351        }
352    }
353}
354
355/// Apply a "delete byte range `[byte_start..byte_end)`" mutation to a
356/// block's runs. Splices the range with empty replacement (clipping
357/// straddling runs) and shifts everything past `byte_end` back by the
358/// deleted length. Adjacent runs that end up equal-format are coalesced.
359pub fn shift_runs_for_delete(runs: &mut Vec<FormatRun>, byte_start: u32, byte_end: u32) {
360    if byte_end <= byte_start {
361        return;
362    }
363    splice_range(runs, byte_start..byte_end, Vec::new());
364    let delta = (byte_end - byte_start) as i32;
365    shift_after(runs, byte_end, -delta);
366    // The shift can make a left-clipped run abut a shifted trailing run
367    // with identical format; coalesce once more to restore the invariant.
368    coalesce_in_place(runs);
369}
370
371/// Apply an "insert" shift to a block's image anchors. Anchors at or
372/// past the offset move forward by `inserted_bytes`.
373pub fn shift_images_for_insert(images: &mut [ImageAnchor], byte_offset: u32, inserted_bytes: u32) {
374    if inserted_bytes == 0 {
375        return;
376    }
377    for img in images.iter_mut() {
378        if img.byte_offset >= byte_offset {
379            img.byte_offset += inserted_bytes;
380        }
381    }
382}
383
384/// Apply a "delete" mutation to a block's image anchors. Anchors whose
385/// `byte_offset` falls inside `[byte_start..byte_end)` are removed;
386/// anchors at or past `byte_end` shift back by the deleted length.
387/// Returns the number of anchors removed.
388pub fn shift_images_for_delete(
389    images: &mut Vec<ImageAnchor>,
390    byte_start: u32,
391    byte_end: u32,
392) -> usize {
393    if byte_end <= byte_start {
394        return 0;
395    }
396    let before = images.len();
397    images.retain(|i| !(i.byte_offset >= byte_start && i.byte_offset < byte_end));
398    let removed = before - images.len();
399    let delta = (byte_end - byte_start) as i32;
400    shift_images_after(images, byte_end, -delta);
401    removed
402}
403
404/// Translate a logical character offset (counting text characters AND
405/// image positions interleaved by their `byte_offset`) into a UTF-8
406/// byte offset within `plain_text`. Used by writer use cases to map a
407/// document-space char position to the byte position where text edits
408/// should land in `block.plain_text`.
409///
410/// Images contribute 1 logical character but 0 bytes in `plain_text`.
411/// Images at the same byte_offset are visited in their stored order.
412pub fn logical_offset_to_byte(plain_text: &str, images: &[ImageAnchor], char_offset: i64) -> u32 {
413    if char_offset <= 0 {
414        return 0;
415    }
416    let mut logical: i64 = 0;
417    let mut images_consumed = 0usize;
418    for (b, _) in plain_text.char_indices() {
419        while images_consumed < images.len() && images[images_consumed].byte_offset <= b as u32 {
420            if logical == char_offset {
421                return b as u32;
422            }
423            logical += 1;
424            images_consumed += 1;
425        }
426        if logical == char_offset {
427            return b as u32;
428        }
429        logical += 1;
430    }
431    let plain_len = plain_text.len() as u32;
432    while images_consumed < images.len() {
433        if logical == char_offset {
434            return plain_len;
435        }
436        logical += 1;
437        images_consumed += 1;
438    }
439    plain_len
440}
441
442/// Split a block's format runs at `byte_offset`. The returned right-hand
443/// vector has its run offsets re-based so they start at byte 0 of the
444/// new (right) block. Straddling runs are split with their `format`
445/// cloned to both halves.
446pub fn split_runs_at(runs: &[FormatRun], byte_offset: u32) -> (Vec<FormatRun>, Vec<FormatRun>) {
447    let mut left = Vec::new();
448    let mut right = Vec::new();
449    for run in runs {
450        if run.byte_end <= byte_offset {
451            left.push(run.clone());
452        } else if run.byte_start >= byte_offset {
453            right.push(FormatRun {
454                byte_start: run.byte_start - byte_offset,
455                byte_end: run.byte_end - byte_offset,
456                format: run.format.clone(),
457            });
458        } else {
459            left.push(FormatRun {
460                byte_start: run.byte_start,
461                byte_end: byte_offset,
462                format: run.format.clone(),
463            });
464            right.push(FormatRun {
465                byte_start: 0,
466                byte_end: run.byte_end - byte_offset,
467                format: run.format.clone(),
468            });
469        }
470    }
471    (left, right)
472}
473
474/// Split block image anchors at `byte_offset`. Anchors at exactly
475/// `byte_offset` go to the right half (rebased to offset 0).
476pub fn split_images_at(
477    images: &[ImageAnchor],
478    byte_offset: u32,
479) -> (Vec<ImageAnchor>, Vec<ImageAnchor>) {
480    let mut left = Vec::new();
481    let mut right = Vec::new();
482    for img in images {
483        if img.byte_offset < byte_offset {
484            left.push(img.clone());
485        } else {
486            let mut new = img.clone();
487            new.byte_offset -= byte_offset;
488            right.push(new);
489        }
490    }
491    (left, right)
492}
493
494// ─────────────────────────────────────────────────────────────────────
495// View synthesis: build a Vec<InlineSegment> from format_runs + images.
496// ─────────────────────────────────────────────────────────────────────
497
498/// Copy the `fmt_*` fields of an `InlineSegment` into a `CharacterFormat`.
499pub fn character_format_from_segment(seg: &InlineSegment) -> CharacterFormat {
500    CharacterFormat {
501        font_family: seg.fmt_font_family.clone(),
502        font_point_size: seg.fmt_font_point_size,
503        font_weight: seg.fmt_font_weight,
504        font_bold: seg.fmt_font_bold,
505        font_italic: seg.fmt_font_italic,
506        font_underline: seg.fmt_font_underline,
507        font_overline: seg.fmt_font_overline,
508        font_strikeout: seg.fmt_font_strikeout,
509        letter_spacing: seg.fmt_letter_spacing,
510        word_spacing: seg.fmt_word_spacing,
511        anchor_href: seg.fmt_anchor_href.clone(),
512        anchor_names: seg.fmt_anchor_names.clone(),
513        is_anchor: seg.fmt_is_anchor,
514        tooltip: seg.fmt_tooltip.clone(),
515        underline_style: seg.fmt_underline_style.clone(),
516        vertical_alignment: seg.fmt_vertical_alignment.clone(),
517    }
518}
519
520/// Apply a `CharacterFormat` onto an `InlineSegment`'s fmt_* fields.
521pub fn apply_character_format_to_segment(seg: &mut InlineSegment, fmt: &CharacterFormat) {
522    seg.fmt_font_family = fmt.font_family.clone();
523    seg.fmt_font_point_size = fmt.font_point_size;
524    seg.fmt_font_weight = fmt.font_weight;
525    seg.fmt_font_bold = fmt.font_bold;
526    seg.fmt_font_italic = fmt.font_italic;
527    seg.fmt_font_underline = fmt.font_underline;
528    seg.fmt_font_overline = fmt.font_overline;
529    seg.fmt_font_strikeout = fmt.font_strikeout;
530    seg.fmt_letter_spacing = fmt.letter_spacing;
531    seg.fmt_word_spacing = fmt.word_spacing;
532    seg.fmt_anchor_href = fmt.anchor_href.clone();
533    seg.fmt_anchor_names = fmt.anchor_names.clone();
534    seg.fmt_is_anchor = fmt.is_anchor;
535    seg.fmt_tooltip = fmt.tooltip.clone();
536    seg.fmt_underline_style = fmt.underline_style.clone();
537    seg.fmt_vertical_alignment = fmt.vertical_alignment.clone();
538}
539
540/// Synthesize a `Vec<InlineSegment>` view of a block from its
541/// `plain_text`, `format_runs`, and `block_images`. Returns segments
542/// in document order: a Text segment per format run (with a fallback
543/// default-format segment for any uncovered bytes), and an Image
544/// segment per anchor at its byte offset.
545///
546/// The canonical reader-side accessor for per-segment data — there is
547/// no persistent inline-element table; this view is computed fresh
548/// each call.
549pub fn inline_segments_view(
550    plain_text: &str,
551    runs: &[FormatRun],
552    images: &[ImageAnchor],
553) -> Vec<InlineSegment> {
554    let mut out: Vec<InlineSegment> = Vec::new();
555    let bytes = plain_text.as_bytes();
556
557    let mut img_iter = images.iter().peekable();
558    let mut cursor: u32 = 0;
559
560    let emit_text =
561        |out: &mut Vec<InlineSegment>, bytes: &[u8], start: u32, end: u32, fmt: CharacterFormat| {
562            if start >= end {
563                return;
564            }
565            let slice = &bytes[start as usize..end as usize];
566            let s = std::str::from_utf8(slice)
567                .expect("block plain_text must be valid UTF-8")
568                .to_string();
569            let mut seg = InlineSegment {
570                content: InlineContent::Text(s),
571                ..Default::default()
572            };
573            apply_character_format_to_segment(&mut seg, &fmt);
574            out.push(seg);
575        };
576
577    let emit_image = |out: &mut Vec<InlineSegment>, anchor: &ImageAnchor| {
578        let mut seg = InlineSegment {
579            content: InlineContent::Image {
580                name: anchor.name.clone(),
581                width: anchor.width,
582                height: anchor.height,
583                quality: anchor.quality,
584            },
585            ..Default::default()
586        };
587        apply_character_format_to_segment(&mut seg, &anchor.format);
588        out.push(seg);
589    };
590
591    for run in runs {
592        while let Some(img) = img_iter.peek() {
593            if img.byte_offset < run.byte_start {
594                emit_text(
595                    &mut out,
596                    bytes,
597                    cursor,
598                    img.byte_offset,
599                    CharacterFormat::default(),
600                );
601                emit_image(&mut out, img);
602                cursor = img.byte_offset;
603                img_iter.next();
604            } else {
605                break;
606            }
607        }
608
609        if cursor < run.byte_start {
610            emit_text(
611                &mut out,
612                bytes,
613                cursor,
614                run.byte_start,
615                CharacterFormat::default(),
616            );
617        }
618
619        emit_text(
620            &mut out,
621            bytes,
622            run.byte_start,
623            run.byte_end,
624            run.format.clone(),
625        );
626        cursor = run.byte_end;
627    }
628
629    for img in img_iter {
630        if img.byte_offset > cursor {
631            emit_text(
632                &mut out,
633                bytes,
634                cursor,
635                img.byte_offset,
636                CharacterFormat::default(),
637            );
638            cursor = img.byte_offset;
639        }
640        emit_image(&mut out, img);
641    }
642
643    if (cursor as usize) < bytes.len() {
644        emit_text(
645            &mut out,
646            bytes,
647            cursor,
648            bytes.len() as u32,
649            CharacterFormat::default(),
650        );
651    }
652
653    out
654}
655
656#[cfg(test)]
657mod tests {
658    use super::*;
659
660    fn run(s: u32, e: u32, bold: bool) -> FormatRun {
661        FormatRun {
662            byte_start: s,
663            byte_end: e,
664            format: CharacterFormat {
665                font_bold: Some(bold),
666                ..Default::default()
667            },
668        }
669    }
670
671    #[test]
672    fn empty_runs_are_well_formed() {
673        debug_assert_well_formed(&[], 0);
674        debug_assert_well_formed(&[], 100);
675    }
676
677    #[test]
678    fn coalesce_merges_adjacent_equal_runs() {
679        let mut rs = vec![run(0, 5, true), run(5, 10, true), run(10, 15, false)];
680        coalesce_in_place(&mut rs);
681        assert_eq!(rs.len(), 2);
682        assert_eq!(rs[0].byte_end, 10);
683    }
684
685    #[test]
686    fn coalesce_leaves_disjoint_runs_alone() {
687        let mut rs = vec![run(0, 5, true), run(7, 10, true)];
688        coalesce_in_place(&mut rs);
689        assert_eq!(rs.len(), 2);
690    }
691
692    #[test]
693    fn splice_range_clips_straddling_runs() {
694        let mut rs = vec![run(0, 20, true)];
695        splice_range(&mut rs, 5..15, vec![run(5, 15, false)]);
696        assert_eq!(rs.len(), 3);
697        assert_eq!(rs[0].byte_end, 5);
698        assert_eq!(rs[1].format.font_bold, Some(false));
699        assert_eq!(rs[2].byte_start, 15);
700    }
701
702    #[test]
703    fn splice_range_empty_replacement_removes_inner_runs() {
704        let mut rs = vec![run(0, 5, true), run(5, 10, false), run(10, 15, true)];
705        splice_range(&mut rs, 5..10, vec![]);
706        // 0..5 bold, then 10..15 bold — after coalesce these are NOT adjacent
707        // (there's a gap from 5..10 in the run table, meaning "no format").
708        assert_eq!(rs.len(), 2);
709        assert_eq!(rs[0].byte_end, 5);
710        assert_eq!(rs[1].byte_start, 10);
711    }
712
713    #[test]
714    fn shift_after_moves_downstream() {
715        let mut rs = vec![run(0, 5, true), run(10, 15, false)];
716        shift_after(&mut rs, 5, 3);
717        assert_eq!(rs[0].byte_start, 0); // unchanged
718        assert_eq!(rs[1].byte_start, 13);
719        assert_eq!(rs[1].byte_end, 18);
720    }
721}