use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum TextAlignment {
#[default]
Left,
Center,
Right,
Justify,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
pub enum HeadingLevel {
#[default]
None,
H1,
H2,
H3,
H4,
H5,
H6,
}
impl HeadingLevel {
pub fn from_number(n: u8) -> Self {
match n {
1 => HeadingLevel::H1,
2 => HeadingLevel::H2,
3 => HeadingLevel::H3,
4 => HeadingLevel::H4,
5 => HeadingLevel::H5,
6 => HeadingLevel::H6,
_ => HeadingLevel::None,
}
}
pub fn level(&self) -> u8 {
match self {
HeadingLevel::None => 0,
HeadingLevel::H1 => 1,
HeadingLevel::H2 => 2,
HeadingLevel::H3 => 3,
HeadingLevel::H4 => 4,
HeadingLevel::H5 => 5,
HeadingLevel::H6 => 6,
}
}
pub fn is_heading(&self) -> bool {
!matches!(self, HeadingLevel::None)
}
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ListType {
#[default]
None,
Bullet,
Numbered,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum RevisionType {
#[default]
None,
Inserted,
Deleted,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ListInfo {
pub list_type: ListType,
pub level: u8,
#[serde(skip_serializing_if = "Option::is_none")]
pub number: Option<u32>,
}
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub struct TextStyle {
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub bold: bool,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub italic: bool,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub underline: bool,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub strikethrough: bool,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub superscript: bool,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub subscript: bool,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub code: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub font: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub size: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub color: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub highlight: Option<String>,
}
impl TextStyle {
pub fn new() -> Self {
Self::default()
}
pub fn bold() -> Self {
Self {
bold: true,
..Default::default()
}
}
pub fn italic() -> Self {
Self {
italic: true,
..Default::default()
}
}
pub fn has_formatting(&self) -> bool {
self.bold
|| self.italic
|| self.underline
|| self.strikethrough
|| self.superscript
|| self.subscript
|| self.code
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct TextRun {
pub text: String,
#[serde(default, skip_serializing_if = "is_default_style")]
pub style: TextStyle,
#[serde(skip_serializing_if = "Option::is_none")]
pub hyperlink: Option<String>,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub line_break: bool,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub page_break: bool,
#[serde(default, skip_serializing_if = "is_default_revision")]
pub revision: RevisionType,
}
fn is_default_style(style: &TextStyle) -> bool {
*style == TextStyle::default()
}
fn is_default_revision(revision: &RevisionType) -> bool {
*revision == RevisionType::None
}
impl TextRun {
pub fn plain(text: impl Into<String>) -> Self {
Self {
text: text.into(),
style: TextStyle::default(),
hyperlink: None,
line_break: false,
page_break: false,
revision: RevisionType::None,
}
}
pub fn styled(text: impl Into<String>, style: TextStyle) -> Self {
Self {
text: text.into(),
style,
hyperlink: None,
line_break: false,
page_break: false,
revision: RevisionType::None,
}
}
pub fn link(text: impl Into<String>, url: impl Into<String>) -> Self {
Self {
text: text.into(),
style: TextStyle::default(),
hyperlink: Some(url.into()),
line_break: false,
page_break: false,
revision: RevisionType::None,
}
}
pub fn is_link(&self) -> bool {
self.hyperlink.is_some()
}
pub fn is_empty(&self) -> bool {
self.text.is_empty()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct InlineImage {
pub resource_id: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub alt_text: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub width: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub height: Option<u32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum ParagraphElement {
Text(TextRun),
Image(InlineImage),
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Paragraph {
#[serde(default)]
pub runs: Vec<TextRun>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub images: Vec<InlineImage>,
#[serde(default, skip_serializing_if = "HeadingLevel::is_none")]
pub heading: HeadingLevel,
#[serde(default, skip_serializing_if = "is_default_alignment")]
pub alignment: TextAlignment,
#[serde(skip_serializing_if = "Option::is_none")]
pub list_info: Option<ListInfo>,
#[serde(skip_serializing_if = "Option::is_none")]
pub style_id: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub style_name: Option<String>,
#[serde(default, skip_serializing_if = "is_zero")]
pub indent_level: u8,
}
fn is_default_alignment(a: &TextAlignment) -> bool {
*a == TextAlignment::Left
}
fn is_zero(n: &u8) -> bool {
*n == 0
}
impl HeadingLevel {
fn is_none(&self) -> bool {
matches!(self, HeadingLevel::None)
}
}
impl Paragraph {
pub fn new() -> Self {
Self::default()
}
pub fn with_text(text: impl Into<String>) -> Self {
Self {
runs: vec![TextRun::plain(text)],
..Default::default()
}
}
pub fn heading(level: HeadingLevel, text: impl Into<String>) -> Self {
Self {
runs: vec![TextRun::plain(text)],
heading: level,
..Default::default()
}
}
pub fn add_run(&mut self, run: TextRun) {
self.runs.push(run);
}
pub fn plain_text(&self) -> String {
let mut text = String::new();
for run in &self.runs {
text.push_str(&run.text);
if run.line_break {
text.push('\n');
}
if run.page_break {
text.push_str("\n---\n");
}
}
text
}
pub fn is_empty(&self) -> bool {
self.runs.is_empty() || self.runs.iter().all(|r| r.is_empty())
}
pub fn is_heading(&self) -> bool {
self.heading.is_heading()
}
pub fn is_list_item(&self) -> bool {
self.list_info.is_some()
}
pub fn merge_adjacent_runs(&mut self) {
if self.runs.len() <= 1 {
return;
}
let mut merged: Vec<TextRun> = Vec::with_capacity(self.runs.len());
for run in self.runs.drain(..) {
let should_merge = merged.last().is_some_and(|last: &TextRun| {
last.style == run.style
&& last.hyperlink == run.hyperlink
&& !last.line_break
&& !last.page_break
});
if should_merge {
if let Some(last) = merged.last_mut() {
let needs_space = Self::needs_space_between(&last.text, &run.text);
if needs_space {
last.text.push(' ');
}
last.text.push_str(&run.text);
if run.line_break {
last.line_break = true;
}
if run.page_break {
last.page_break = true;
}
}
} else {
merged.push(run);
}
}
self.runs = merged;
}
fn needs_space_between(prev: &str, next: &str) -> bool {
let last_char = match prev.chars().last() {
Some(c) => c,
None => return false,
};
let first_char = match next.chars().next() {
Some(c) => c,
None => return false,
};
if last_char.is_whitespace() || first_char.is_whitespace() {
return false;
}
if matches!(
first_char,
'.' | ',' | ':' | ';' | '!' | '?' | ')' | ']' | '}' | '"' | '\'' | '…' | '~'
) {
return false;
}
if matches!(last_char, '(' | '[' | '{' | '"' | '\'') {
return false;
}
false
}
pub fn with_merged_runs(&self) -> Self {
let mut para = self.clone();
para.merge_adjacent_runs();
para
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_heading_level() {
assert_eq!(HeadingLevel::from_number(1), HeadingLevel::H1);
assert_eq!(HeadingLevel::from_number(6), HeadingLevel::H6);
assert_eq!(HeadingLevel::from_number(7), HeadingLevel::None);
assert_eq!(HeadingLevel::from_number(0), HeadingLevel::None);
assert_eq!(HeadingLevel::H3.level(), 3);
assert!(HeadingLevel::H1.is_heading());
assert!(!HeadingLevel::None.is_heading());
}
#[test]
fn test_text_run() {
let plain = TextRun::plain("Hello");
assert_eq!(plain.text, "Hello");
assert!(!plain.is_link());
let link = TextRun::link("Click here", "https://example.com");
assert!(link.is_link());
assert_eq!(link.hyperlink, Some("https://example.com".to_string()));
}
#[test]
fn test_text_style() {
let style = TextStyle::bold();
assert!(style.bold);
assert!(style.has_formatting());
let plain = TextStyle::default();
assert!(!plain.has_formatting());
}
#[test]
fn test_paragraph() {
let para = Paragraph::with_text("Hello, World!");
assert_eq!(para.plain_text(), "Hello, World!");
assert!(!para.is_heading());
assert!(!para.is_empty());
let heading = Paragraph::heading(HeadingLevel::H1, "Title");
assert!(heading.is_heading());
assert_eq!(heading.heading.level(), 1);
}
#[test]
fn test_paragraph_plain_text_preserves_run_breaks() {
let para = Paragraph {
runs: vec![
TextRun {
text: "First line".to_string(),
line_break: true,
..Default::default()
},
TextRun {
text: "Second line".to_string(),
page_break: true,
..Default::default()
},
TextRun::plain("Third line"),
],
..Default::default()
};
assert_eq!(
para.plain_text(),
"First line\nSecond line\n---\nThird line"
);
}
#[test]
fn test_paragraph_serialization() {
let para = Paragraph::with_text("Test");
let json = serde_json::to_string(¶).unwrap();
assert!(!json.contains("heading"));
assert!(!json.contains("alignment"));
}
#[test]
fn test_merge_adjacent_runs_ascii_no_split() {
let mut para = Paragraph::new();
para.runs.push(TextRun::plain("DRB"));
para.runs.push(TextRun::plain("D"));
para.merge_adjacent_runs();
assert_eq!(para.runs.len(), 1);
assert_eq!(para.runs[0].text, "DRBD"); }
#[test]
fn test_merge_adjacent_runs_ping() {
let mut para = Paragraph::new();
para.runs.push(TextRun::plain("P"));
para.runs.push(TextRun::plain("ING"));
para.merge_adjacent_runs();
assert_eq!(para.runs.len(), 1);
assert_eq!(para.runs[0].text, "PING"); }
#[test]
fn test_merge_adjacent_runs_tcp() {
let mut para = Paragraph::new();
para.runs.push(TextRun::plain("T"));
para.runs.push(TextRun::plain("CP"));
para.merge_adjacent_runs();
assert_eq!(para.runs.len(), 1);
assert_eq!(para.runs[0].text, "TCP"); }
#[test]
fn test_merge_adjacent_runs_cjk_ascii_no_space() {
let mut para = Paragraph::new();
para.runs.push(TextRun::plain("리소스"));
para.runs.push(TextRun::plain("DRBD"));
para.merge_adjacent_runs();
assert_eq!(para.runs.len(), 1);
assert_eq!(para.runs[0].text, "리소스DRBD"); }
#[test]
fn test_merge_adjacent_runs_ascii_cjk_no_space() {
let mut para = Paragraph::new();
para.runs.push(TextRun::plain("CJ"));
para.runs.push(TextRun::plain("대한통운"));
para.merge_adjacent_runs();
assert_eq!(para.runs.len(), 1);
assert_eq!(para.runs[0].text, "CJ대한통운"); }
#[test]
fn test_merge_adjacent_runs_korean_no_space() {
let mut para = Paragraph::new();
para.runs.push(TextRun::plain("네트워크"));
para.runs.push(TextRun::plain("카드"));
para.merge_adjacent_runs();
assert_eq!(para.runs.len(), 1);
assert_eq!(para.runs[0].text, "네트워크카드"); }
#[test]
fn test_merge_adjacent_runs_korean_syllables() {
let mut para = Paragraph::new();
para.runs.push(TextRun::plain("정"));
para.runs.push(TextRun::plain("의"));
para.merge_adjacent_runs();
assert_eq!(para.runs.len(), 1);
assert_eq!(para.runs[0].text, "정의"); }
#[test]
fn test_merge_adjacent_runs_korean_with_explicit_space() {
let mut para = Paragraph::new();
para.runs.push(TextRun::plain("서버 ")); para.runs.push(TextRun::plain("리부팅"));
para.merge_adjacent_runs();
assert_eq!(para.runs.len(), 1);
assert_eq!(para.runs[0].text, "서버 리부팅"); }
#[test]
fn test_merge_adjacent_runs_chinese_no_space() {
let mut para = Paragraph::new();
para.runs.push(TextRun::plain("中文"));
para.runs.push(TextRun::plain("测试"));
para.merge_adjacent_runs();
assert_eq!(para.runs.len(), 1);
assert_eq!(para.runs[0].text, "中文测试"); }
#[test]
fn test_merge_adjacent_runs_japanese_no_space() {
let mut para = Paragraph::new();
para.runs.push(TextRun::plain("日本語"));
para.runs.push(TextRun::plain("テスト"));
para.merge_adjacent_runs();
assert_eq!(para.runs.len(), 1);
assert_eq!(para.runs[0].text, "日本語テスト"); }
#[test]
fn test_merge_adjacent_runs_different_styles_not_merged() {
let mut para = Paragraph::new();
para.runs.push(TextRun::plain("normal"));
para.runs.push(TextRun::styled("bold", TextStyle::bold()));
para.merge_adjacent_runs();
assert_eq!(para.runs.len(), 2); assert_eq!(para.runs[0].text, "normal");
assert_eq!(para.runs[1].text, "bold");
}
#[test]
fn test_merge_preserves_existing_spaces() {
let mut para = Paragraph::new();
para.runs.push(TextRun::plain("Hello "));
para.runs.push(TextRun::plain("World"));
para.merge_adjacent_runs();
assert_eq!(para.runs.len(), 1);
assert_eq!(para.runs[0].text, "Hello World"); }
#[test]
fn test_merge_adjacent_runs_preserves_page_break() {
let mut para = Paragraph {
runs: vec![
TextRun::plain("Before"),
TextRun {
text: "After".to_string(),
page_break: true,
..Default::default()
},
],
..Default::default()
};
para.merge_adjacent_runs();
assert!(
para.runs.iter().any(|r| r.page_break),
"page_break lost after merge: runs = {:?}",
para.runs
);
}
#[test]
fn test_merge_adjacent_runs_blocks_on_last_page_break() {
let mut para = Paragraph {
runs: vec![
TextRun {
text: "Before".to_string(),
page_break: true,
..Default::default()
},
TextRun::plain("After"),
],
..Default::default()
};
para.merge_adjacent_runs();
assert_eq!(para.runs.len(), 2, "must not merge across a page_break");
assert!(para.runs[0].page_break);
assert!(!para.runs[1].page_break);
}
}