use crate::ir;
use super::context::{
apply_charpr_attrs, flush_cell_paragraph, flush_footnote_paragraph, flush_list_item_paragraph,
flush_paragraph, ParseContext, RubyPart,
};
pub(super) fn handle_start_element(
local: &str,
e: &quick_xml::events::BytesStart,
ctx: &mut ParseContext,
) {
match local {
"p" | "hp:p" => {
ctx.in_paragraph = true;
ctx.current_text.clear();
ctx.current_inlines.clear();
ctx.heading_level = None;
for attr in e.attributes().flatten() {
let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or("");
if key == "styleIDRef" || key == "hp:styleIDRef" {
let val = attr.unescape_value().unwrap_or_default().to_string();
if let Some(level) = parse_heading_style(&val) {
ctx.heading_level = Some(level);
}
}
}
}
"run" | "hp:run" => {
ctx.in_run = true;
ctx.current_bold = false;
ctx.current_italic = false;
ctx.current_underline = false;
ctx.current_strike = false;
}
"charPr" | "hp:charPr" => apply_charpr_attrs(e, ctx),
"t" | "hp:t" => {}
"tbl" | "hp:tbl" => {
ctx.in_table = true;
ctx.table_rows.clear();
ctx.col_count = 0;
for attr in e.attributes().flatten() {
let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or("");
if key == "colCnt" || key == "hp:colCnt" {
if let Ok(n) = attr.unescape_value().unwrap_or_default().parse::<usize>() {
ctx.col_count = n;
}
}
}
}
"tr" | "hp:tr" => {
ctx.current_row_cells.clear();
}
"tc" | "hp:tc" => {
ctx.in_cell = true;
ctx.cell_blocks.clear();
ctx.cell_inlines.clear();
ctx.cell_text.clear();
ctx.current_colspan = 1;
ctx.current_rowspan = 1;
}
"ol" => {
ctx.in_list = true;
ctx.list_ordered = true;
ctx.list_items.clear();
}
"ul" => {
ctx.in_list = true;
ctx.list_ordered = false;
ctx.list_items.clear();
}
"li" | "hp:li" => {
ctx.in_list_item = true;
ctx.list_item_blocks.clear();
ctx.list_item_inlines.clear();
ctx.list_item_text.clear();
}
"equation" | "hp:equation" | "eqEdit" | "hp:eqEdit" => {
ctx.in_equation = true;
ctx.equation_text.clear();
}
"ruby" | "hp:ruby" => {
ctx.in_ruby = true;
ctx.ruby_base_text.clear();
ctx.ruby_annotation_text.clear();
ctx.ruby_current_part = RubyPart::None;
}
"rubyText" | "hp:rubyText" => {
ctx.ruby_current_part = RubyPart::Annotation;
}
"baseText" | "hp:baseText" => {
ctx.ruby_current_part = RubyPart::Base;
}
"fn" | "hp:fn" | "footnote" | "hp:footnote" | "en" | "hp:en" | "endnote" | "hp:endnote" => {
let mut id = String::new();
for attr in e.attributes().flatten() {
let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or("");
if key == "id" || key == "hp:id" || key == "noteId" || key == "hp:noteId" {
id = attr.unescape_value().unwrap_or_default().to_string();
break;
}
}
ctx.in_footnote = true;
ctx.footnote_id = id;
ctx.footnote_blocks.clear();
ctx.footnote_inlines.clear();
ctx.footnote_text.clear();
}
_ => {}
}
}
pub(super) fn handle_end_element(local: &str, ctx: &mut ParseContext, section: &mut ir::Section) {
match local {
"p" | "hp:p" => {
if ctx.in_footnote {
flush_footnote_paragraph(ctx);
} else if ctx.in_cell {
flush_cell_paragraph(ctx);
} else if ctx.in_list_item {
flush_list_item_paragraph(ctx);
} else {
flush_paragraph(ctx, section);
}
ctx.in_paragraph = false;
}
"run" | "hp:run" => {
ctx.in_run = false;
}
"t" | "hp:t" if !ctx.current_text.is_empty() => {
let text = std::mem::take(&mut ctx.current_text);
let inline = ir::Inline {
text,
bold: ctx.current_bold,
italic: ctx.current_italic,
underline: ctx.current_underline,
strikethrough: ctx.current_strike,
..ir::Inline::default()
};
ctx.push_inline(inline);
}
"tbl" | "hp:tbl" => {
let col_count = ctx.col_count.max(
ctx.table_rows
.iter()
.map(|r| r.cells.len())
.max()
.unwrap_or(0),
);
if !ctx.table_rows.is_empty() {
let rows = std::mem::take(&mut ctx.table_rows);
section.blocks.push(ir::Block::Table { rows, col_count });
}
ctx.in_table = false;
}
"tr" | "hp:tr" => {
let cells = std::mem::take(&mut ctx.current_row_cells);
ctx.table_rows.push(ir::TableRow {
cells,
is_header: ctx.table_rows.is_empty(),
});
}
"tc" | "hp:tc" => {
flush_cell_paragraph(ctx);
let blocks = std::mem::take(&mut ctx.cell_blocks);
ctx.current_row_cells.push(ir::TableCell {
blocks,
colspan: ctx.current_colspan,
rowspan: ctx.current_rowspan,
});
ctx.in_cell = false;
}
"li" | "hp:li" => {
flush_list_item_paragraph(ctx);
let blocks = std::mem::take(&mut ctx.list_item_blocks);
ctx.list_items.push(ir::ListItem {
blocks,
children: Vec::new(),
});
ctx.in_list_item = false;
}
"ol" | "ul" => {
if !ctx.list_items.is_empty() {
let items = std::mem::take(&mut ctx.list_items);
section.blocks.push(ir::Block::List {
ordered: ctx.list_ordered,
start: 1,
items,
});
}
ctx.in_list = false;
}
"equation" | "hp:equation" | "eqEdit" | "hp:eqEdit" => {
if !ctx.equation_text.is_empty() {
let tex = std::mem::take(&mut ctx.equation_text);
section.blocks.push(ir::Block::Math { display: true, tex });
}
ctx.in_equation = false;
}
"rubyText" | "hp:rubyText" | "baseText" | "hp:baseText" => {
ctx.ruby_current_part = RubyPart::None;
}
"ruby" | "hp:ruby" => {
let base = std::mem::take(&mut ctx.ruby_base_text);
let annotation = std::mem::take(&mut ctx.ruby_annotation_text);
if !base.is_empty() || !annotation.is_empty() {
let inline = ir::Inline {
text: base,
ruby: if annotation.is_empty() {
None
} else {
Some(annotation)
},
..ir::Inline::default()
};
ctx.push_inline(inline);
}
ctx.in_ruby = false;
ctx.ruby_current_part = RubyPart::None;
}
"fn" | "hp:fn" | "footnote" | "hp:footnote" | "en" | "hp:en" | "endnote" | "hp:endnote" => {
flush_footnote_paragraph(ctx);
if !ctx.footnote_blocks.is_empty() {
let id = std::mem::take(&mut ctx.footnote_id);
let content = std::mem::take(&mut ctx.footnote_blocks);
section.blocks.push(ir::Block::Footnote { id, content });
} else {
ctx.footnote_id.clear();
}
ctx.in_footnote = false;
}
_ => {}
}
}
pub(super) fn handle_text(text: &str, ctx: &mut ParseContext) {
if ctx.in_equation {
ctx.equation_text.push_str(text);
return;
}
if ctx.in_ruby {
match ctx.ruby_current_part {
RubyPart::Base => ctx.ruby_base_text.push_str(text),
RubyPart::Annotation => ctx.ruby_annotation_text.push_str(text),
RubyPart::None => {}
}
return;
}
if ctx.in_run {
ctx.active_text_buf().push_str(text);
}
}
pub(super) fn handle_empty_element(
local: &str,
e: &quick_xml::events::BytesStart,
ctx: &mut ParseContext,
section: &mut ir::Section,
) {
match local {
"img" | "hp:img" | "picture" | "hp:picture" => {
let mut src = String::new();
let mut alt = String::new();
for attr in e.attributes().flatten() {
let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or("");
let val = attr.unescape_value().unwrap_or_default().to_string();
match key {
"src" | "href" | "hp:href" | "binaryItemIDRef" | "hp:binaryItemIDRef" => {
src = val;
}
"alt" => alt = val,
_ => {}
}
}
if !src.is_empty() {
let img = ir::Block::Image { src, alt };
if let Some(block) = ctx.push_block_scoped(img) {
section.blocks.push(block);
}
}
}
"lineBreak" | "hp:lineBreak" => {
ctx.active_text_buf().push('\n');
}
"cellAddr" | "hp:cellAddr" => {
for attr in e.attributes().flatten() {
let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or("");
let val = attr.unescape_value().unwrap_or_default();
match key {
"colSpan" | "hp:colSpan" => {
if let Ok(n) = val.parse::<u32>() {
if n >= 1 {
ctx.current_colspan = n;
}
}
}
"rowSpan" | "hp:rowSpan" => {
if let Ok(n) = val.parse::<u32>() {
if n >= 1 {
ctx.current_rowspan = n;
}
}
}
_ => {}
}
}
}
"charPr" | "hp:charPr" => apply_charpr_attrs(e, ctx),
"noteRef" | "hp:noteRef" => {
let mut note_id = String::new();
for attr in e.attributes().flatten() {
let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or("");
if key == "noteId" || key == "hp:noteId" || key == "id" || key == "hp:id" {
note_id = attr.unescape_value().unwrap_or_default().to_string();
break;
}
}
if !note_id.is_empty() {
let inline = ir::Inline {
footnote_ref: Some(note_id),
..ir::Inline::default()
};
ctx.push_inline(inline);
}
}
"ctrl" | "hp:ctrl" => {
let mut ctrl_kind = String::new();
let mut id_ref = String::new();
for attr in e.attributes().flatten() {
let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or("");
let val = attr.unescape_value().unwrap_or_default().to_string();
match key {
"id" | "hp:id" => ctrl_kind = val,
"idRef" | "hp:idRef" => id_ref = val,
_ => {}
}
}
if (ctrl_kind == "fn" || ctrl_kind == "en") && !id_ref.is_empty() {
let inline = ir::Inline {
footnote_ref: Some(id_ref),
..ir::Inline::default()
};
ctx.push_inline(inline);
}
}
_ => {}
}
}
pub(crate) fn parse_heading_style(style_ref: &str) -> Option<u8> {
let lower = style_ref.to_lowercase();
if lower.contains("heading") || lower.contains("제목") || lower.contains("개요") {
let num_str: String = style_ref
.chars()
.rev()
.take_while(|c| c.is_ascii_digit())
.collect::<String>()
.chars()
.rev()
.collect();
if let Ok(n) = num_str.parse::<u8>() {
if (1..=6).contains(&n) {
return Some(n);
}
}
return Some(1);
}
None
}