1use crate::entities::{CharVerticalAlignment, UnderlineStyle};
18use serde::{Deserialize, Serialize};
19
20#[derive(Serialize, Deserialize, Default, Clone, Debug, PartialEq, Eq)]
22pub enum InlineContent {
23 #[default]
24 Empty,
25 Text(String),
26 Image {
27 name: String,
28 width: i64,
29 height: i64,
30 quality: i64,
31 },
32}
33
34#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
43pub struct InlineSegment {
44 pub content: InlineContent,
45 pub fmt_font_family: Option<String>,
46 pub fmt_font_point_size: Option<i64>,
47 pub fmt_font_weight: Option<i64>,
48 pub fmt_font_bold: Option<bool>,
49 pub fmt_font_italic: Option<bool>,
50 pub fmt_font_underline: Option<bool>,
51 pub fmt_font_overline: Option<bool>,
52 pub fmt_font_strikeout: Option<bool>,
53 pub fmt_letter_spacing: Option<i64>,
54 pub fmt_word_spacing: Option<i64>,
55 pub fmt_anchor_href: Option<String>,
56 pub fmt_anchor_names: Vec<String>,
57 pub fmt_is_anchor: Option<bool>,
58 pub fmt_tooltip: Option<String>,
59 pub fmt_underline_style: Option<UnderlineStyle>,
60 pub fmt_vertical_alignment: Option<CharVerticalAlignment>,
61}
62
63#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
68pub struct CharacterFormat {
69 pub font_family: Option<String>,
70 pub font_point_size: Option<i64>,
71 pub font_weight: Option<i64>,
72 pub font_bold: Option<bool>,
73 pub font_italic: Option<bool>,
74 pub font_underline: Option<bool>,
75 pub font_overline: Option<bool>,
76 pub font_strikeout: Option<bool>,
77 pub letter_spacing: Option<i64>,
78 pub word_spacing: Option<i64>,
79 pub anchor_href: Option<String>,
80 pub anchor_names: Vec<String>,
81 pub is_anchor: Option<bool>,
82 pub tooltip: Option<String>,
83 pub underline_style: Option<UnderlineStyle>,
84 pub vertical_alignment: Option<CharVerticalAlignment>,
85}
86
87#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
91pub struct FormatRun {
92 pub byte_start: u32,
93 pub byte_end: u32,
94 pub format: CharacterFormat,
95}
96
97#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
104pub struct ImageAnchor {
105 pub byte_offset: u32,
106 pub name: String,
107 pub width: i64,
108 pub height: i64,
109 pub quality: i64,
110 pub format: CharacterFormat,
111}
112
113pub fn debug_assert_well_formed(runs: &[FormatRun], block_text_len: usize) {
126 if runs.is_empty() {
127 return;
128 }
129 for run in runs {
130 debug_assert!(
131 run.byte_start < run.byte_end,
132 "format run is empty or reversed: {run:?}"
133 );
134 }
135 for i in 0..runs.len() - 1 {
136 debug_assert!(
137 runs[i].byte_end <= runs[i + 1].byte_start,
138 "format runs overlap or unsorted at {i}: {:?} then {:?}",
139 runs[i],
140 runs[i + 1]
141 );
142 debug_assert!(
143 !(runs[i].byte_end == runs[i + 1].byte_start && runs[i].format == runs[i + 1].format),
144 "adjacent identical format runs at {i} not coalesced: {:?}",
145 runs[i]
146 );
147 }
148 debug_assert!(
149 runs.last().unwrap().byte_end as usize <= block_text_len,
150 "last format run {:?} exceeds block text len {block_text_len}",
151 runs.last().unwrap()
152 );
153}
154
155pub fn coalesce_in_place(runs: &mut Vec<FormatRun>) {
157 if runs.len() < 2 {
158 return;
159 }
160 let mut write = 0usize;
161 for read in 1..runs.len() {
162 if runs[write].byte_end == runs[read].byte_start && runs[write].format == runs[read].format
163 {
164 runs[write].byte_end = runs[read].byte_end;
165 } else {
166 write += 1;
167 if write != read {
168 runs[write] = runs[read].clone();
169 }
170 }
171 }
172 runs.truncate(write + 1);
173}
174
175pub fn splice_range(
185 runs: &mut Vec<FormatRun>,
186 range: std::ops::Range<u32>,
187 replacement: Vec<FormatRun>,
188) {
189 debug_assert!(range.start <= range.end);
190 for r in &replacement {
191 debug_assert!(r.byte_start >= range.start && r.byte_end <= range.end);
192 }
193
194 let mut result: Vec<FormatRun> = Vec::with_capacity(runs.len() + replacement.len());
195
196 for run in runs.iter() {
198 if run.byte_end <= range.start {
199 result.push(run.clone());
200 } else if run.byte_start < range.start {
201 result.push(FormatRun {
203 byte_start: run.byte_start,
204 byte_end: range.start,
205 format: run.format.clone(),
206 });
207 }
208 }
209
210 result.extend(replacement);
212
213 for run in runs.iter() {
215 if run.byte_start >= range.end {
216 result.push(run.clone());
217 } else if run.byte_end > range.end {
218 result.push(FormatRun {
220 byte_start: range.end,
221 byte_end: run.byte_end,
222 format: run.format.clone(),
223 });
224 }
225 }
226
227 coalesce_in_place(&mut result);
228 *runs = result;
229}
230
231pub fn capture_runs_in_range(runs: &[FormatRun], start: u32, end: u32) -> Vec<FormatRun> {
242 let mut out = Vec::new();
243 for run in runs {
244 if run.byte_end <= start || run.byte_start >= end {
245 continue;
246 }
247 let clipped_start = std::cmp::max(run.byte_start, start);
248 let clipped_end = std::cmp::min(run.byte_end, end);
249 if clipped_start < clipped_end {
250 out.push(FormatRun {
251 byte_start: clipped_start,
252 byte_end: clipped_end,
253 format: run.format.clone(),
254 });
255 }
256 }
257 out
258}
259
260pub fn capture_image_formats_in_range(
264 images: &[ImageAnchor],
265 start: u32,
266 end: u32,
267) -> Vec<(u32, CharacterFormat)> {
268 let mut out = Vec::new();
269 for img in images {
270 if img.byte_offset >= start && img.byte_offset < end {
271 out.push((img.byte_offset, img.format.clone()));
272 }
273 }
274 out
275}
276
277pub fn shift_after(runs: &mut [FormatRun], threshold: u32, delta: i32) {
285 for run in runs.iter_mut() {
286 if run.byte_start >= threshold {
287 let new_start = (run.byte_start as i64) + (delta as i64);
288 let new_end = (run.byte_end as i64) + (delta as i64);
289 debug_assert!(new_start >= 0 && new_end >= new_start);
290 run.byte_start = new_start as u32;
291 run.byte_end = new_end as u32;
292 }
293 }
294}
295
296pub fn synth_element_id(block_id: u64, byte_start: u32) -> u64 {
312 const SYNTH_TAG: u64 = 0x4000_0000_0000_0000;
313 SYNTH_TAG | ((block_id & 0x3FFF_FFFF) << 32) | (byte_start as u64)
314}
315
316pub fn shift_images_after(images: &mut [ImageAnchor], threshold: u32, delta: i32) {
319 for img in images.iter_mut() {
320 if img.byte_offset >= threshold {
321 let new_off = (img.byte_offset as i64) + (delta as i64);
322 debug_assert!(new_off >= 0);
323 img.byte_offset = new_off as u32;
324 }
325 }
326}
327
328pub fn shift_runs_for_insert(runs: &mut [FormatRun], byte_offset: u32, inserted_bytes: u32) {
339 if inserted_bytes == 0 {
340 return;
341 }
342 for run in runs.iter_mut() {
343 if run.byte_start >= byte_offset {
344 run.byte_start += inserted_bytes;
345 run.byte_end += inserted_bytes;
346 } else if run.byte_end >= byte_offset {
347 run.byte_end += inserted_bytes;
351 }
352 }
353}
354
355pub fn shift_runs_for_delete(runs: &mut Vec<FormatRun>, byte_start: u32, byte_end: u32) {
360 if byte_end <= byte_start {
361 return;
362 }
363 splice_range(runs, byte_start..byte_end, Vec::new());
364 let delta = (byte_end - byte_start) as i32;
365 shift_after(runs, byte_end, -delta);
366 coalesce_in_place(runs);
369}
370
371pub fn shift_images_for_insert(images: &mut [ImageAnchor], byte_offset: u32, inserted_bytes: u32) {
374 if inserted_bytes == 0 {
375 return;
376 }
377 for img in images.iter_mut() {
378 if img.byte_offset >= byte_offset {
379 img.byte_offset += inserted_bytes;
380 }
381 }
382}
383
384pub fn shift_images_for_delete(
389 images: &mut Vec<ImageAnchor>,
390 byte_start: u32,
391 byte_end: u32,
392) -> usize {
393 if byte_end <= byte_start {
394 return 0;
395 }
396 let before = images.len();
397 images.retain(|i| !(i.byte_offset >= byte_start && i.byte_offset < byte_end));
398 let removed = before - images.len();
399 let delta = (byte_end - byte_start) as i32;
400 shift_images_after(images, byte_end, -delta);
401 removed
402}
403
404pub fn logical_offset_to_byte(plain_text: &str, images: &[ImageAnchor], char_offset: i64) -> u32 {
413 if char_offset <= 0 {
414 return 0;
415 }
416 let mut logical: i64 = 0;
417 let mut images_consumed = 0usize;
418 for (b, _) in plain_text.char_indices() {
419 while images_consumed < images.len() && images[images_consumed].byte_offset <= b as u32 {
420 if logical == char_offset {
421 return b as u32;
422 }
423 logical += 1;
424 images_consumed += 1;
425 }
426 if logical == char_offset {
427 return b as u32;
428 }
429 logical += 1;
430 }
431 let plain_len = plain_text.len() as u32;
432 while images_consumed < images.len() {
433 if logical == char_offset {
434 return plain_len;
435 }
436 logical += 1;
437 images_consumed += 1;
438 }
439 plain_len
440}
441
442pub fn split_runs_at(runs: &[FormatRun], byte_offset: u32) -> (Vec<FormatRun>, Vec<FormatRun>) {
447 let mut left = Vec::new();
448 let mut right = Vec::new();
449 for run in runs {
450 if run.byte_end <= byte_offset {
451 left.push(run.clone());
452 } else if run.byte_start >= byte_offset {
453 right.push(FormatRun {
454 byte_start: run.byte_start - byte_offset,
455 byte_end: run.byte_end - byte_offset,
456 format: run.format.clone(),
457 });
458 } else {
459 left.push(FormatRun {
460 byte_start: run.byte_start,
461 byte_end: byte_offset,
462 format: run.format.clone(),
463 });
464 right.push(FormatRun {
465 byte_start: 0,
466 byte_end: run.byte_end - byte_offset,
467 format: run.format.clone(),
468 });
469 }
470 }
471 (left, right)
472}
473
474pub fn split_images_at(
477 images: &[ImageAnchor],
478 byte_offset: u32,
479) -> (Vec<ImageAnchor>, Vec<ImageAnchor>) {
480 let mut left = Vec::new();
481 let mut right = Vec::new();
482 for img in images {
483 if img.byte_offset < byte_offset {
484 left.push(img.clone());
485 } else {
486 let mut new = img.clone();
487 new.byte_offset -= byte_offset;
488 right.push(new);
489 }
490 }
491 (left, right)
492}
493
494pub fn character_format_from_segment(seg: &InlineSegment) -> CharacterFormat {
500 CharacterFormat {
501 font_family: seg.fmt_font_family.clone(),
502 font_point_size: seg.fmt_font_point_size,
503 font_weight: seg.fmt_font_weight,
504 font_bold: seg.fmt_font_bold,
505 font_italic: seg.fmt_font_italic,
506 font_underline: seg.fmt_font_underline,
507 font_overline: seg.fmt_font_overline,
508 font_strikeout: seg.fmt_font_strikeout,
509 letter_spacing: seg.fmt_letter_spacing,
510 word_spacing: seg.fmt_word_spacing,
511 anchor_href: seg.fmt_anchor_href.clone(),
512 anchor_names: seg.fmt_anchor_names.clone(),
513 is_anchor: seg.fmt_is_anchor,
514 tooltip: seg.fmt_tooltip.clone(),
515 underline_style: seg.fmt_underline_style.clone(),
516 vertical_alignment: seg.fmt_vertical_alignment.clone(),
517 }
518}
519
520pub fn apply_character_format_to_segment(seg: &mut InlineSegment, fmt: &CharacterFormat) {
522 seg.fmt_font_family = fmt.font_family.clone();
523 seg.fmt_font_point_size = fmt.font_point_size;
524 seg.fmt_font_weight = fmt.font_weight;
525 seg.fmt_font_bold = fmt.font_bold;
526 seg.fmt_font_italic = fmt.font_italic;
527 seg.fmt_font_underline = fmt.font_underline;
528 seg.fmt_font_overline = fmt.font_overline;
529 seg.fmt_font_strikeout = fmt.font_strikeout;
530 seg.fmt_letter_spacing = fmt.letter_spacing;
531 seg.fmt_word_spacing = fmt.word_spacing;
532 seg.fmt_anchor_href = fmt.anchor_href.clone();
533 seg.fmt_anchor_names = fmt.anchor_names.clone();
534 seg.fmt_is_anchor = fmt.is_anchor;
535 seg.fmt_tooltip = fmt.tooltip.clone();
536 seg.fmt_underline_style = fmt.underline_style.clone();
537 seg.fmt_vertical_alignment = fmt.vertical_alignment.clone();
538}
539
540pub fn inline_segments_view(
550 plain_text: &str,
551 runs: &[FormatRun],
552 images: &[ImageAnchor],
553) -> Vec<InlineSegment> {
554 let mut out: Vec<InlineSegment> = Vec::new();
555 let bytes = plain_text.as_bytes();
556
557 let mut img_iter = images.iter().peekable();
558 let mut cursor: u32 = 0;
559
560 let emit_text =
561 |out: &mut Vec<InlineSegment>, bytes: &[u8], start: u32, end: u32, fmt: CharacterFormat| {
562 if start >= end {
563 return;
564 }
565 let slice = &bytes[start as usize..end as usize];
566 let s = std::str::from_utf8(slice)
567 .expect("block plain_text must be valid UTF-8")
568 .to_string();
569 let mut seg = InlineSegment {
570 content: InlineContent::Text(s),
571 ..Default::default()
572 };
573 apply_character_format_to_segment(&mut seg, &fmt);
574 out.push(seg);
575 };
576
577 let emit_image = |out: &mut Vec<InlineSegment>, anchor: &ImageAnchor| {
578 let mut seg = InlineSegment {
579 content: InlineContent::Image {
580 name: anchor.name.clone(),
581 width: anchor.width,
582 height: anchor.height,
583 quality: anchor.quality,
584 },
585 ..Default::default()
586 };
587 apply_character_format_to_segment(&mut seg, &anchor.format);
588 out.push(seg);
589 };
590
591 for run in runs {
592 while let Some(img) = img_iter.peek() {
593 if img.byte_offset < run.byte_start {
594 emit_text(
595 &mut out,
596 bytes,
597 cursor,
598 img.byte_offset,
599 CharacterFormat::default(),
600 );
601 emit_image(&mut out, img);
602 cursor = img.byte_offset;
603 img_iter.next();
604 } else {
605 break;
606 }
607 }
608
609 if cursor < run.byte_start {
610 emit_text(
611 &mut out,
612 bytes,
613 cursor,
614 run.byte_start,
615 CharacterFormat::default(),
616 );
617 }
618
619 emit_text(
620 &mut out,
621 bytes,
622 run.byte_start,
623 run.byte_end,
624 run.format.clone(),
625 );
626 cursor = run.byte_end;
627 }
628
629 for img in img_iter {
630 if img.byte_offset > cursor {
631 emit_text(
632 &mut out,
633 bytes,
634 cursor,
635 img.byte_offset,
636 CharacterFormat::default(),
637 );
638 cursor = img.byte_offset;
639 }
640 emit_image(&mut out, img);
641 }
642
643 if (cursor as usize) < bytes.len() {
644 emit_text(
645 &mut out,
646 bytes,
647 cursor,
648 bytes.len() as u32,
649 CharacterFormat::default(),
650 );
651 }
652
653 out
654}
655
656#[cfg(test)]
657mod tests {
658 use super::*;
659
660 fn run(s: u32, e: u32, bold: bool) -> FormatRun {
661 FormatRun {
662 byte_start: s,
663 byte_end: e,
664 format: CharacterFormat {
665 font_bold: Some(bold),
666 ..Default::default()
667 },
668 }
669 }
670
671 #[test]
672 fn empty_runs_are_well_formed() {
673 debug_assert_well_formed(&[], 0);
674 debug_assert_well_formed(&[], 100);
675 }
676
677 #[test]
678 fn coalesce_merges_adjacent_equal_runs() {
679 let mut rs = vec![run(0, 5, true), run(5, 10, true), run(10, 15, false)];
680 coalesce_in_place(&mut rs);
681 assert_eq!(rs.len(), 2);
682 assert_eq!(rs[0].byte_end, 10);
683 }
684
685 #[test]
686 fn coalesce_leaves_disjoint_runs_alone() {
687 let mut rs = vec![run(0, 5, true), run(7, 10, true)];
688 coalesce_in_place(&mut rs);
689 assert_eq!(rs.len(), 2);
690 }
691
692 #[test]
693 fn splice_range_clips_straddling_runs() {
694 let mut rs = vec![run(0, 20, true)];
695 splice_range(&mut rs, 5..15, vec![run(5, 15, false)]);
696 assert_eq!(rs.len(), 3);
697 assert_eq!(rs[0].byte_end, 5);
698 assert_eq!(rs[1].format.font_bold, Some(false));
699 assert_eq!(rs[2].byte_start, 15);
700 }
701
702 #[test]
703 fn splice_range_empty_replacement_removes_inner_runs() {
704 let mut rs = vec![run(0, 5, true), run(5, 10, false), run(10, 15, true)];
705 splice_range(&mut rs, 5..10, vec![]);
706 assert_eq!(rs.len(), 2);
709 assert_eq!(rs[0].byte_end, 5);
710 assert_eq!(rs[1].byte_start, 10);
711 }
712
713 #[test]
714 fn shift_after_moves_downstream() {
715 let mut rs = vec![run(0, 5, true), run(10, 15, false)];
716 shift_after(&mut rs, 5, 3);
717 assert_eq!(rs[0].byte_start, 0); assert_eq!(rs[1].byte_start, 13);
719 assert_eq!(rs[1].byte_end, 18);
720 }
721}