use crate::ir;
pub(crate) struct ParseContext {
pub(crate) in_paragraph: bool,
pub(crate) in_run: bool,
pub(crate) in_text: bool,
pub(crate) in_table: bool,
pub(crate) in_cell: bool,
pub(crate) current_text: String,
pub(crate) current_inlines: Vec<ir::Inline>,
pub(crate) current_bold: bool,
pub(crate) current_italic: bool,
pub(crate) current_underline: bool,
pub(crate) current_strike: bool,
pub(crate) current_superscript: bool,
pub(crate) current_subscript: bool,
pub(crate) current_color: Option<String>,
pub(crate) current_font_name: Option<String>,
pub(crate) face_names: Vec<String>,
pub(crate) heading_level: Option<u8>,
pub(crate) table_rows: Vec<ir::TableRow>,
pub(crate) current_row_cells: Vec<ir::TableCell>,
pub(crate) cell_blocks: Vec<ir::Block>,
pub(crate) cell_inlines: Vec<ir::Inline>,
pub(crate) cell_text: String,
pub(crate) col_count: usize,
pub(crate) current_colspan: u32,
pub(crate) current_rowspan: u32,
pub(crate) list_ordered: bool,
pub(crate) in_list: bool,
pub(crate) list_items: Vec<ir::ListItem>,
pub(crate) in_list_item: bool,
pub(crate) list_item_blocks: Vec<ir::Block>,
pub(crate) list_item_inlines: Vec<ir::Inline>,
pub(crate) list_item_text: String,
pub(crate) equation_text: String,
pub(crate) in_equation: bool,
pub(crate) in_footnote: bool,
pub(crate) footnote_id: String,
pub(crate) footnote_blocks: Vec<ir::Block>,
pub(crate) footnote_inlines: Vec<ir::Inline>,
pub(crate) footnote_text: String,
pub(crate) in_hyperlink: bool,
pub(crate) hyperlink_url: Option<String>,
pub(crate) in_ruby: bool,
pub(crate) ruby_base_text: String,
pub(crate) ruby_annotation_text: String,
pub(crate) ruby_current_part: RubyPart,
pub(crate) current_para_pr_id: Option<String>,
pub(crate) current_num_pr_id: Option<String>,
pub(crate) pending_code_lang: Option<Option<String>>,
pub(crate) page_layout_landscape: bool,
pub(crate) page_layout_width: Option<u32>,
pub(crate) page_layout_height: Option<u32>,
pub(crate) page_layout_margin_left: Option<u32>,
pub(crate) page_layout_margin_right: Option<u32>,
pub(crate) page_layout_margin_top: Option<u32>,
pub(crate) page_layout_margin_bottom: Option<u32>,
pub(crate) has_sec_pr: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub(crate) enum RubyPart {
#[default]
None,
Base,
Annotation,
}
impl Default for ParseContext {
fn default() -> Self {
Self {
in_paragraph: false,
in_run: false,
in_text: false,
in_table: false,
in_cell: false,
current_text: String::new(),
current_inlines: Vec::new(),
current_bold: false,
current_italic: false,
current_underline: false,
current_strike: false,
current_superscript: false,
current_subscript: false,
current_color: None,
current_font_name: None,
face_names: Vec::new(),
heading_level: None,
table_rows: Vec::new(),
current_row_cells: Vec::new(),
cell_blocks: Vec::new(),
cell_inlines: Vec::new(),
cell_text: String::new(),
col_count: 0,
current_colspan: 1,
current_rowspan: 1,
list_ordered: false,
in_list: false,
list_items: Vec::new(),
in_list_item: false,
list_item_blocks: Vec::new(),
list_item_inlines: Vec::new(),
list_item_text: String::new(),
equation_text: String::new(),
in_equation: false,
in_footnote: false,
footnote_id: String::new(),
footnote_blocks: Vec::new(),
footnote_inlines: Vec::new(),
footnote_text: String::new(),
in_hyperlink: false,
hyperlink_url: None,
in_ruby: false,
ruby_base_text: String::new(),
ruby_annotation_text: String::new(),
ruby_current_part: RubyPart::None,
current_para_pr_id: None,
current_num_pr_id: None,
pending_code_lang: None,
page_layout_landscape: false,
page_layout_width: None,
page_layout_height: None,
page_layout_margin_left: None,
page_layout_margin_right: None,
page_layout_margin_top: None,
page_layout_margin_bottom: None,
has_sec_pr: false,
}
}
}
impl ParseContext {
pub(crate) fn take_page_layout(&self) -> Option<ir::PageLayout> {
if !self.has_sec_pr {
return None;
}
Some(ir::PageLayout {
width: self.page_layout_width,
height: self.page_layout_height,
landscape: self.page_layout_landscape,
margin_left: self.page_layout_margin_left,
margin_right: self.page_layout_margin_right,
margin_top: self.page_layout_margin_top,
margin_bottom: self.page_layout_margin_bottom,
})
}
pub(crate) fn active_text_buf(&mut self) -> &mut String {
if self.in_footnote {
&mut self.footnote_text
} else if self.in_list_item {
&mut self.list_item_text
} else if self.in_cell {
&mut self.cell_text
} else {
&mut self.current_text
}
}
pub(crate) fn push_inline(&mut self, inline: ir::Inline) {
if self.in_footnote {
self.footnote_inlines.push(inline);
} else if self.in_list_item {
self.list_item_inlines.push(inline);
} else if self.in_cell {
self.cell_inlines.push(inline);
} else {
self.current_inlines.push(inline);
}
}
pub(crate) fn push_block_scoped(&mut self, block: ir::Block) -> Option<ir::Block> {
if self.in_footnote {
self.footnote_blocks.push(block);
None
} else if self.in_list_item {
self.list_item_blocks.push(block);
None
} else if self.in_cell {
self.cell_blocks.push(block);
None
} else {
Some(block)
}
}
}
pub(crate) fn apply_charpr_attrs(e: &quick_xml::events::BytesStart, ctx: &mut ParseContext) {
let mut face_id: Option<usize> = None;
for attr in e.attributes().flatten() {
let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or("");
let val = attr.unescape_value().unwrap_or_default();
match key {
"bold" | "hp:bold" => ctx.current_bold = val.as_ref() == "true" || val.as_ref() == "1",
"italic" | "hp:italic" => {
ctx.current_italic = val.as_ref() == "true" || val.as_ref() == "1"
}
"underline" | "hp:underline" => {
ctx.current_underline =
!val.is_empty() && val.as_ref() != "none" && val.as_ref() != "0"
}
"strikeout" | "hp:strikeout" => {
ctx.current_strike =
!val.is_empty() && val.as_ref() != "none" && val.as_ref() != "0"
}
"supscript" | "hp:supscript" => {
ctx.current_superscript = val.as_ref() == "superscript";
ctx.current_subscript = val.as_ref() == "subscript";
}
"color" | "hp:color" => {
let raw = val.as_ref().trim_start_matches('#');
if raw.is_empty() || raw.eq_ignore_ascii_case("000000") {
ctx.current_color = None;
} else {
ctx.current_color = Some(format!("#{}", raw.to_ascii_uppercase()));
}
}
"faceNameIDRef" | "hp:faceNameIDRef" | "hangulIDRef" | "hp:hangulIDRef" => {
if let Ok(idx) = val.as_ref().parse::<usize>() {
face_id = Some(idx);
}
}
_ => {}
}
}
if let Some(idx) = face_id {
ctx.current_font_name = ctx.face_names.get(idx).cloned();
}
}
#[allow(clippy::too_many_arguments)]
fn flush_inlines_to_blocks(
text: &mut String,
inlines: &mut Vec<ir::Inline>,
blocks: &mut Vec<ir::Block>,
bold: bool,
italic: bool,
underline: bool,
strike: bool,
superscript: bool,
subscript: bool,
color: &Option<String>,
font_name: &Option<String>,
) {
if !text.is_empty() {
let t = std::mem::take(text);
inlines.push(
ir::Inline::with_formatting(
t,
bold,
italic,
underline,
strike,
superscript,
subscript,
color.clone(),
)
.with_font_name(font_name.clone()),
);
}
if !inlines.is_empty() {
let i = std::mem::take(inlines);
blocks.push(ir::Block::Paragraph { inlines: i });
}
}
#[cfg(test)]
pub(crate) fn flush_paragraph(ctx: &mut ParseContext, section: &mut ir::Section) {
if !ctx.current_text.is_empty() {
let t = std::mem::take(&mut ctx.current_text);
ctx.current_inlines.push(
ir::Inline::with_formatting(
t,
ctx.current_bold,
ctx.current_italic,
ctx.current_underline,
ctx.current_strike,
ctx.current_superscript,
ctx.current_subscript,
ctx.current_color.clone(),
)
.with_font_name(ctx.current_font_name.clone()),
);
}
let code_lang = ctx.pending_code_lang.take();
if ctx.current_inlines.is_empty() {
return;
}
let inlines = std::mem::take(&mut ctx.current_inlines);
if let Some(language) = code_lang {
let code = inlines.into_iter().map(|i| i.text).collect::<String>();
section.blocks.push(ir::Block::CodeBlock { language, code });
return;
}
let block = if let Some(level) = ctx.heading_level {
ir::Block::Heading { level, inlines }
} else {
ir::Block::Paragraph { inlines }
};
section.blocks.push(block);
}
pub(crate) fn flush_paragraph_staged(ctx: &mut ParseContext) -> Option<StagedBlock> {
if !ctx.current_text.is_empty() {
let t = std::mem::take(&mut ctx.current_text);
ctx.current_inlines.push(
ir::Inline::with_formatting(
t,
ctx.current_bold,
ctx.current_italic,
ctx.current_underline,
ctx.current_strike,
ctx.current_superscript,
ctx.current_subscript,
ctx.current_color.clone(),
)
.with_font_name(ctx.current_font_name.clone()),
);
}
let para_pr_id = ctx.current_para_pr_id.take();
let num_pr_id = ctx.current_num_pr_id.take();
let code_lang = ctx.pending_code_lang.take();
if ctx.current_inlines.is_empty() {
return None;
}
let inlines = std::mem::take(&mut ctx.current_inlines);
if let Some(language) = code_lang {
let code = inlines.into_iter().map(|i| i.text).collect::<String>();
return Some(StagedBlock::Plain(ir::Block::CodeBlock { language, code }));
}
let block = if let Some(level) = ctx.heading_level {
ir::Block::Heading { level, inlines }
} else {
ir::Block::Paragraph { inlines }
};
let is_heading = ctx.heading_level.is_some();
let list_depth: Option<u32> = if is_heading {
None
} else {
match para_pr_id.as_deref() {
Some("2") => Some(0),
Some("3") => Some(1),
Some(s) if s.parse::<u32>().ok().is_some_and(|n| n >= 4) => Some(1),
_ => None,
}
};
Some(if let Some(depth) = list_depth {
let ordered = num_pr_id.as_deref() == Some("1");
StagedBlock::ListPara {
depth,
ordered,
block,
}
} else {
StagedBlock::Plain(block)
})
}
#[derive(Debug)]
pub(crate) enum StagedBlock {
Plain(ir::Block),
ListPara {
depth: u32,
ordered: bool,
block: ir::Block,
},
}
pub(crate) fn group_list_paragraphs(staged: Vec<StagedBlock>) -> Vec<ir::Block> {
let mut out: Vec<ir::Block> = Vec::with_capacity(staged.len());
let mut pending: Vec<(u32, bool, ir::Block)> = Vec::new();
let flush_pending = |pending: &mut Vec<(u32, bool, ir::Block)>, out: &mut Vec<ir::Block>| {
if pending.is_empty() {
return;
}
let list = build_list(std::mem::take(pending));
out.push(list);
};
for staged_block in staged {
match staged_block {
StagedBlock::Plain(block) => {
flush_pending(&mut pending, &mut out);
out.push(block);
}
StagedBlock::ListPara {
depth,
ordered,
block,
} => {
pending.push((depth, ordered, block));
}
}
}
flush_pending(&mut pending, &mut out);
out
}
fn build_list(entries: Vec<(u32, bool, ir::Block)>) -> ir::Block {
if entries.is_empty() {
return ir::Block::List {
ordered: false,
start: 1,
items: vec![],
};
}
let top_ordered = entries[0].1;
let mut items: Vec<ir::ListItem> = Vec::new();
for (depth, _ordered, block) in entries {
if depth == 0 {
items.push(ir::ListItem {
blocks: vec![block],
children: vec![],
});
} else {
if items.is_empty() {
items.push(ir::ListItem {
blocks: vec![block],
children: vec![],
});
} else {
let parent = items.last_mut().expect("items is non-empty");
parent.children.push(ir::ListItem {
blocks: vec![block],
children: vec![],
});
}
}
}
ir::Block::List {
ordered: top_ordered,
start: 1,
items,
}
}
pub(crate) fn flush_cell_paragraph(ctx: &mut ParseContext) {
flush_inlines_to_blocks(
&mut ctx.cell_text,
&mut ctx.cell_inlines,
&mut ctx.cell_blocks,
ctx.current_bold,
ctx.current_italic,
ctx.current_underline,
ctx.current_strike,
ctx.current_superscript,
ctx.current_subscript,
&ctx.current_color,
&ctx.current_font_name,
);
}
pub(crate) fn flush_list_item_paragraph(ctx: &mut ParseContext) {
flush_inlines_to_blocks(
&mut ctx.list_item_text,
&mut ctx.list_item_inlines,
&mut ctx.list_item_blocks,
ctx.current_bold,
ctx.current_italic,
ctx.current_underline,
ctx.current_strike,
ctx.current_superscript,
ctx.current_subscript,
&ctx.current_color,
&ctx.current_font_name,
);
}
pub(crate) fn flush_footnote_paragraph(ctx: &mut ParseContext) {
flush_inlines_to_blocks(
&mut ctx.footnote_text,
&mut ctx.footnote_inlines,
&mut ctx.footnote_blocks,
ctx.current_bold,
ctx.current_italic,
ctx.current_underline,
ctx.current_strike,
ctx.current_superscript,
ctx.current_subscript,
&ctx.current_color,
&ctx.current_font_name,
);
}