use crate::entities::{CharVerticalAlignment, UnderlineStyle};
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Default, Clone, Debug, PartialEq, Eq)]
pub enum InlineContent {
#[default]
Empty,
Text(String),
Image {
name: String,
width: i64,
height: i64,
quality: i64,
},
}
#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
pub struct InlineSegment {
pub content: InlineContent,
pub fmt_font_family: Option<String>,
pub fmt_font_point_size: Option<i64>,
pub fmt_font_weight: Option<i64>,
pub fmt_font_bold: Option<bool>,
pub fmt_font_italic: Option<bool>,
pub fmt_font_underline: Option<bool>,
pub fmt_font_overline: Option<bool>,
pub fmt_font_strikeout: Option<bool>,
pub fmt_letter_spacing: Option<i64>,
pub fmt_word_spacing: Option<i64>,
pub fmt_anchor_href: Option<String>,
pub fmt_anchor_names: Vec<String>,
pub fmt_is_anchor: Option<bool>,
pub fmt_tooltip: Option<String>,
pub fmt_underline_style: Option<UnderlineStyle>,
pub fmt_vertical_alignment: Option<CharVerticalAlignment>,
}
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct CharacterFormat {
pub font_family: Option<String>,
pub font_point_size: Option<i64>,
pub font_weight: Option<i64>,
pub font_bold: Option<bool>,
pub font_italic: Option<bool>,
pub font_underline: Option<bool>,
pub font_overline: Option<bool>,
pub font_strikeout: Option<bool>,
pub letter_spacing: Option<i64>,
pub word_spacing: Option<i64>,
pub anchor_href: Option<String>,
pub anchor_names: Vec<String>,
pub is_anchor: Option<bool>,
pub tooltip: Option<String>,
pub underline_style: Option<UnderlineStyle>,
pub vertical_alignment: Option<CharVerticalAlignment>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct FormatRun {
pub byte_start: u32,
pub byte_end: u32,
pub format: CharacterFormat,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ImageAnchor {
pub byte_offset: u32,
pub name: String,
pub width: i64,
pub height: i64,
pub quality: i64,
pub format: CharacterFormat,
}
pub fn debug_assert_well_formed(runs: &[FormatRun], block_text_len: usize) {
if runs.is_empty() {
return;
}
for run in runs {
debug_assert!(
run.byte_start < run.byte_end,
"format run is empty or reversed: {run:?}"
);
}
for i in 0..runs.len() - 1 {
debug_assert!(
runs[i].byte_end <= runs[i + 1].byte_start,
"format runs overlap or unsorted at {i}: {:?} then {:?}",
runs[i],
runs[i + 1]
);
debug_assert!(
!(runs[i].byte_end == runs[i + 1].byte_start && runs[i].format == runs[i + 1].format),
"adjacent identical format runs at {i} not coalesced: {:?}",
runs[i]
);
}
debug_assert!(
runs.last().unwrap().byte_end as usize <= block_text_len,
"last format run {:?} exceeds block text len {block_text_len}",
runs.last().unwrap()
);
}
pub fn coalesce_in_place(runs: &mut Vec<FormatRun>) {
if runs.len() < 2 {
return;
}
let mut write = 0usize;
for read in 1..runs.len() {
if runs[write].byte_end == runs[read].byte_start && runs[write].format == runs[read].format
{
runs[write].byte_end = runs[read].byte_end;
} else {
write += 1;
if write != read {
runs[write] = runs[read].clone();
}
}
}
runs.truncate(write + 1);
}
pub fn splice_range(
runs: &mut Vec<FormatRun>,
range: std::ops::Range<u32>,
replacement: Vec<FormatRun>,
) {
debug_assert!(range.start <= range.end);
for r in &replacement {
debug_assert!(r.byte_start >= range.start && r.byte_end <= range.end);
}
let mut result: Vec<FormatRun> = Vec::with_capacity(runs.len() + replacement.len());
for run in runs.iter() {
if run.byte_end <= range.start {
result.push(run.clone());
} else if run.byte_start < range.start {
result.push(FormatRun {
byte_start: run.byte_start,
byte_end: range.start,
format: run.format.clone(),
});
}
}
result.extend(replacement);
for run in runs.iter() {
if run.byte_start >= range.end {
result.push(run.clone());
} else if run.byte_end > range.end {
result.push(FormatRun {
byte_start: range.end,
byte_end: run.byte_end,
format: run.format.clone(),
});
}
}
coalesce_in_place(&mut result);
*runs = result;
}
pub fn capture_runs_in_range(runs: &[FormatRun], start: u32, end: u32) -> Vec<FormatRun> {
let mut out = Vec::new();
for run in runs {
if run.byte_end <= start || run.byte_start >= end {
continue;
}
let clipped_start = std::cmp::max(run.byte_start, start);
let clipped_end = std::cmp::min(run.byte_end, end);
if clipped_start < clipped_end {
out.push(FormatRun {
byte_start: clipped_start,
byte_end: clipped_end,
format: run.format.clone(),
});
}
}
out
}
pub fn capture_image_formats_in_range(
images: &[ImageAnchor],
start: u32,
end: u32,
) -> Vec<(u32, CharacterFormat)> {
let mut out = Vec::new();
for img in images {
if img.byte_offset >= start && img.byte_offset < end {
out.push((img.byte_offset, img.format.clone()));
}
}
out
}
pub fn shift_after(runs: &mut [FormatRun], threshold: u32, delta: i32) {
for run in runs.iter_mut() {
if run.byte_start >= threshold {
let new_start = (run.byte_start as i64) + (delta as i64);
let new_end = (run.byte_end as i64) + (delta as i64);
debug_assert!(new_start >= 0 && new_end >= new_start);
run.byte_start = new_start as u32;
run.byte_end = new_end as u32;
}
}
}
pub fn synth_element_id(block_id: u64, byte_start: u32) -> u64 {
const SYNTH_TAG: u64 = 0x4000_0000_0000_0000;
SYNTH_TAG | ((block_id & 0x3FFF_FFFF) << 32) | (byte_start as u64)
}
pub fn shift_images_after(images: &mut [ImageAnchor], threshold: u32, delta: i32) {
for img in images.iter_mut() {
if img.byte_offset >= threshold {
let new_off = (img.byte_offset as i64) + (delta as i64);
debug_assert!(new_off >= 0);
img.byte_offset = new_off as u32;
}
}
}
pub fn shift_runs_for_insert(runs: &mut [FormatRun], byte_offset: u32, inserted_bytes: u32) {
if inserted_bytes == 0 {
return;
}
for run in runs.iter_mut() {
if run.byte_start >= byte_offset {
run.byte_start += inserted_bytes;
run.byte_end += inserted_bytes;
} else if run.byte_end >= byte_offset {
run.byte_end += inserted_bytes;
}
}
}
pub fn shift_runs_for_delete(runs: &mut Vec<FormatRun>, byte_start: u32, byte_end: u32) {
if byte_end <= byte_start {
return;
}
splice_range(runs, byte_start..byte_end, Vec::new());
let delta = (byte_end - byte_start) as i32;
shift_after(runs, byte_end, -delta);
coalesce_in_place(runs);
}
pub fn shift_images_for_insert(images: &mut [ImageAnchor], byte_offset: u32, inserted_bytes: u32) {
if inserted_bytes == 0 {
return;
}
for img in images.iter_mut() {
if img.byte_offset >= byte_offset {
img.byte_offset += inserted_bytes;
}
}
}
pub fn shift_images_for_delete(
images: &mut Vec<ImageAnchor>,
byte_start: u32,
byte_end: u32,
) -> usize {
if byte_end <= byte_start {
return 0;
}
let before = images.len();
images.retain(|i| !(i.byte_offset >= byte_start && i.byte_offset < byte_end));
let removed = before - images.len();
let delta = (byte_end - byte_start) as i32;
shift_images_after(images, byte_end, -delta);
removed
}
pub fn logical_offset_to_byte(plain_text: &str, images: &[ImageAnchor], char_offset: i64) -> u32 {
if char_offset <= 0 {
return 0;
}
let mut logical: i64 = 0;
let mut images_consumed = 0usize;
for (b, _) in plain_text.char_indices() {
while images_consumed < images.len() && images[images_consumed].byte_offset <= b as u32 {
if logical == char_offset {
return b as u32;
}
logical += 1;
images_consumed += 1;
}
if logical == char_offset {
return b as u32;
}
logical += 1;
}
let plain_len = plain_text.len() as u32;
while images_consumed < images.len() {
if logical == char_offset {
return plain_len;
}
logical += 1;
images_consumed += 1;
}
plain_len
}
pub fn split_runs_at(runs: &[FormatRun], byte_offset: u32) -> (Vec<FormatRun>, Vec<FormatRun>) {
let mut left = Vec::new();
let mut right = Vec::new();
for run in runs {
if run.byte_end <= byte_offset {
left.push(run.clone());
} else if run.byte_start >= byte_offset {
right.push(FormatRun {
byte_start: run.byte_start - byte_offset,
byte_end: run.byte_end - byte_offset,
format: run.format.clone(),
});
} else {
left.push(FormatRun {
byte_start: run.byte_start,
byte_end: byte_offset,
format: run.format.clone(),
});
right.push(FormatRun {
byte_start: 0,
byte_end: run.byte_end - byte_offset,
format: run.format.clone(),
});
}
}
(left, right)
}
pub fn split_images_at(
images: &[ImageAnchor],
byte_offset: u32,
) -> (Vec<ImageAnchor>, Vec<ImageAnchor>) {
let mut left = Vec::new();
let mut right = Vec::new();
for img in images {
if img.byte_offset < byte_offset {
left.push(img.clone());
} else {
let mut new = img.clone();
new.byte_offset -= byte_offset;
right.push(new);
}
}
(left, right)
}
pub fn character_format_from_segment(seg: &InlineSegment) -> CharacterFormat {
CharacterFormat {
font_family: seg.fmt_font_family.clone(),
font_point_size: seg.fmt_font_point_size,
font_weight: seg.fmt_font_weight,
font_bold: seg.fmt_font_bold,
font_italic: seg.fmt_font_italic,
font_underline: seg.fmt_font_underline,
font_overline: seg.fmt_font_overline,
font_strikeout: seg.fmt_font_strikeout,
letter_spacing: seg.fmt_letter_spacing,
word_spacing: seg.fmt_word_spacing,
anchor_href: seg.fmt_anchor_href.clone(),
anchor_names: seg.fmt_anchor_names.clone(),
is_anchor: seg.fmt_is_anchor,
tooltip: seg.fmt_tooltip.clone(),
underline_style: seg.fmt_underline_style.clone(),
vertical_alignment: seg.fmt_vertical_alignment.clone(),
}
}
pub fn apply_character_format_to_segment(seg: &mut InlineSegment, fmt: &CharacterFormat) {
seg.fmt_font_family = fmt.font_family.clone();
seg.fmt_font_point_size = fmt.font_point_size;
seg.fmt_font_weight = fmt.font_weight;
seg.fmt_font_bold = fmt.font_bold;
seg.fmt_font_italic = fmt.font_italic;
seg.fmt_font_underline = fmt.font_underline;
seg.fmt_font_overline = fmt.font_overline;
seg.fmt_font_strikeout = fmt.font_strikeout;
seg.fmt_letter_spacing = fmt.letter_spacing;
seg.fmt_word_spacing = fmt.word_spacing;
seg.fmt_anchor_href = fmt.anchor_href.clone();
seg.fmt_anchor_names = fmt.anchor_names.clone();
seg.fmt_is_anchor = fmt.is_anchor;
seg.fmt_tooltip = fmt.tooltip.clone();
seg.fmt_underline_style = fmt.underline_style.clone();
seg.fmt_vertical_alignment = fmt.vertical_alignment.clone();
}
pub fn inline_segments_view(
plain_text: &str,
runs: &[FormatRun],
images: &[ImageAnchor],
) -> Vec<InlineSegment> {
let mut out: Vec<InlineSegment> = Vec::new();
let bytes = plain_text.as_bytes();
let mut img_iter = images.iter().peekable();
let mut cursor: u32 = 0;
let emit_text =
|out: &mut Vec<InlineSegment>, bytes: &[u8], start: u32, end: u32, fmt: CharacterFormat| {
if start >= end {
return;
}
let slice = &bytes[start as usize..end as usize];
let s = std::str::from_utf8(slice)
.expect("block plain_text must be valid UTF-8")
.to_string();
let mut seg = InlineSegment {
content: InlineContent::Text(s),
..Default::default()
};
apply_character_format_to_segment(&mut seg, &fmt);
out.push(seg);
};
let emit_image = |out: &mut Vec<InlineSegment>, anchor: &ImageAnchor| {
let mut seg = InlineSegment {
content: InlineContent::Image {
name: anchor.name.clone(),
width: anchor.width,
height: anchor.height,
quality: anchor.quality,
},
..Default::default()
};
apply_character_format_to_segment(&mut seg, &anchor.format);
out.push(seg);
};
for run in runs {
while let Some(img) = img_iter.peek() {
if img.byte_offset < run.byte_start {
emit_text(
&mut out,
bytes,
cursor,
img.byte_offset,
CharacterFormat::default(),
);
emit_image(&mut out, img);
cursor = img.byte_offset;
img_iter.next();
} else {
break;
}
}
if cursor < run.byte_start {
emit_text(
&mut out,
bytes,
cursor,
run.byte_start,
CharacterFormat::default(),
);
}
emit_text(
&mut out,
bytes,
run.byte_start,
run.byte_end,
run.format.clone(),
);
cursor = run.byte_end;
}
for img in img_iter {
if img.byte_offset > cursor {
emit_text(
&mut out,
bytes,
cursor,
img.byte_offset,
CharacterFormat::default(),
);
cursor = img.byte_offset;
}
emit_image(&mut out, img);
}
if (cursor as usize) < bytes.len() {
emit_text(
&mut out,
bytes,
cursor,
bytes.len() as u32,
CharacterFormat::default(),
);
}
out
}
#[cfg(test)]
mod tests {
use super::*;
fn run(s: u32, e: u32, bold: bool) -> FormatRun {
FormatRun {
byte_start: s,
byte_end: e,
format: CharacterFormat {
font_bold: Some(bold),
..Default::default()
},
}
}
#[test]
fn empty_runs_are_well_formed() {
debug_assert_well_formed(&[], 0);
debug_assert_well_formed(&[], 100);
}
#[test]
fn coalesce_merges_adjacent_equal_runs() {
let mut rs = vec![run(0, 5, true), run(5, 10, true), run(10, 15, false)];
coalesce_in_place(&mut rs);
assert_eq!(rs.len(), 2);
assert_eq!(rs[0].byte_end, 10);
}
#[test]
fn coalesce_leaves_disjoint_runs_alone() {
let mut rs = vec![run(0, 5, true), run(7, 10, true)];
coalesce_in_place(&mut rs);
assert_eq!(rs.len(), 2);
}
#[test]
fn splice_range_clips_straddling_runs() {
let mut rs = vec![run(0, 20, true)];
splice_range(&mut rs, 5..15, vec![run(5, 15, false)]);
assert_eq!(rs.len(), 3);
assert_eq!(rs[0].byte_end, 5);
assert_eq!(rs[1].format.font_bold, Some(false));
assert_eq!(rs[2].byte_start, 15);
}
#[test]
fn splice_range_empty_replacement_removes_inner_runs() {
let mut rs = vec![run(0, 5, true), run(5, 10, false), run(10, 15, true)];
splice_range(&mut rs, 5..10, vec![]);
assert_eq!(rs.len(), 2);
assert_eq!(rs[0].byte_end, 5);
assert_eq!(rs[1].byte_start, 10);
}
#[test]
fn shift_after_moves_downstream() {
let mut rs = vec![run(0, 5, true), run(10, 15, false)];
shift_after(&mut rs, 5, 3);
assert_eq!(rs[0].byte_start, 0); assert_eq!(rs[1].byte_start, 13);
assert_eq!(rs[1].byte_end, 18);
}
}