use crate::error::Hwp2MdError;
use crate::ir;
use std::collections::HashMap;
use std::io::Write;
use std::path::Path;
use zip::write::SimpleFileOptions;
use zip::ZipWriter;
#[path = "writer_header.rs"]
mod header;
#[path = "writer_section.rs"]
mod section;
#[cfg(test)]
use section::generate_section_xml;
pub(crate) type ImageAssetMap = HashMap<String, String>;
const DEFAULT_FONT: &str = "\u{BC14}\u{D0D5}"; pub(crate) const CODE_FONT: &str = "Courier New";
pub(crate) const LANG_SLOTS: [&str; 7] = [
"HANGUL", "LATIN", "HANJA", "JAPANESE", "OTHER", "SYMBOL", "USER",
];
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) struct CharPrKey {
pub(crate) bold: bool,
pub(crate) italic: bool,
pub(crate) underline: bool,
pub(crate) strikethrough: bool,
pub(crate) code: bool,
pub(crate) superscript: bool,
pub(crate) subscript: bool,
pub(crate) color: Option<String>,
pub(crate) font_name: Option<String>,
pub(crate) height: u32,
}
const HEADING_HEIGHTS: [u32; 7] = [1000, 2400, 2000, 1600, 1400, 1200, 1000];
impl CharPrKey {
fn plain() -> Self {
Self {
bold: false,
italic: false,
underline: false,
strikethrough: false,
code: false,
superscript: false,
subscript: false,
color: None,
font_name: None,
height: 1000,
}
}
fn from_inline(inline: &ir::Inline) -> Self {
let font_name = if inline.code {
Some(CODE_FONT.to_owned())
} else {
inline.font_name.clone()
};
Self {
bold: inline.bold,
italic: inline.italic,
underline: inline.underline,
strikethrough: inline.strikethrough,
code: inline.code,
superscript: inline.superscript,
subscript: inline.subscript,
color: inline.color.clone(),
font_name,
height: 1000,
}
}
fn code_block() -> Self {
Self {
bold: false,
italic: false,
underline: false,
strikethrough: false,
code: true,
superscript: false,
subscript: false,
color: None,
font_name: Some(CODE_FONT.to_owned()),
height: 1000,
}
}
fn heading(level: u8) -> Self {
let idx = (level as usize).clamp(1, 6);
Self {
bold: true,
italic: false,
underline: false,
strikethrough: false,
code: false,
superscript: false,
subscript: false,
color: None,
font_name: None,
height: HEADING_HEIGHTS[idx],
}
}
}
pub(crate) struct RefTables {
pub(crate) char_pr_ids: HashMap<CharPrKey, u32>,
pub(crate) font_names: Vec<String>,
pub(crate) border_fill_id: u32,
}
impl RefTables {
fn build(doc: &ir::Document) -> Self {
let mut char_pr_ids: HashMap<CharPrKey, u32> = HashMap::new();
let mut font_names: Vec<String> = Vec::new();
let mut font_set: std::collections::HashSet<String> = std::collections::HashSet::new();
char_pr_ids.insert(CharPrKey::plain(), 0);
font_set.insert(DEFAULT_FONT.to_owned());
font_names.push(DEFAULT_FONT.to_owned());
let mut next_id: u32 = 1;
for section in &doc.sections {
collect_from_blocks(
§ion.blocks,
&mut char_pr_ids,
&mut next_id,
&mut font_names,
&mut font_set,
);
}
for level in 1..=6u8 {
let key = CharPrKey::heading(level);
if let std::collections::hash_map::Entry::Vacant(e) = char_pr_ids.entry(key) {
e.insert(next_id);
next_id += 1;
}
}
let code_key = CharPrKey::code_block();
if let std::collections::hash_map::Entry::Vacant(e) = char_pr_ids.entry(code_key) {
e.insert(next_id);
next_id += 1;
}
if font_set.insert(CODE_FONT.to_owned()) {
font_names.push(CODE_FONT.to_owned());
}
let _ = next_id;
Self {
char_pr_ids,
font_names,
border_fill_id: 1,
}
}
fn code_block_char_pr_id(&self) -> u32 {
self.char_pr_id(&CharPrKey::code_block())
}
fn heading_char_pr_id(&self, level: u8) -> u32 {
self.char_pr_id(&CharPrKey::heading(level))
}
fn char_pr_id(&self, key: &CharPrKey) -> u32 {
*self.char_pr_ids.get(key).unwrap_or(&0)
}
}
fn collect_from_blocks(
blocks: &[ir::Block],
char_pr_ids: &mut HashMap<CharPrKey, u32>,
next_id: &mut u32,
font_names: &mut Vec<String>,
font_set: &mut std::collections::HashSet<String>,
) {
for block in blocks {
match block {
ir::Block::Heading { inlines, .. } | ir::Block::Paragraph { inlines } => {
collect_from_inlines(inlines, char_pr_ids, next_id, font_names, font_set);
}
ir::Block::Table { rows, .. } => {
for row in rows {
for cell in &row.cells {
collect_from_blocks(
&cell.blocks,
char_pr_ids,
next_id,
font_names,
font_set,
);
}
}
}
ir::Block::BlockQuote { blocks }
| ir::Block::Footnote {
content: blocks, ..
} => {
collect_from_blocks(blocks, char_pr_ids, next_id, font_names, font_set);
}
ir::Block::List { items, .. } => {
for item in items {
collect_from_blocks(&item.blocks, char_pr_ids, next_id, font_names, font_set);
}
}
ir::Block::CodeBlock { .. }
| ir::Block::Image { .. }
| ir::Block::HorizontalRule
| ir::Block::Math { .. } => {}
}
}
}
fn collect_from_inlines(
inlines: &[ir::Inline],
char_pr_ids: &mut HashMap<CharPrKey, u32>,
next_id: &mut u32,
font_names: &mut Vec<String>,
font_set: &mut std::collections::HashSet<String>,
) {
for inline in inlines {
let key = CharPrKey::from_inline(inline);
if let Some(font) = &key.font_name {
if font_set.insert(font.clone()) {
font_names.push(font.clone());
}
}
if let std::collections::hash_map::Entry::Vacant(e) = char_pr_ids.entry(key) {
e.insert(*next_id);
*next_id += 1;
}
}
}
#[derive(Debug)]
enum ImageSource<'a> {
FilePath(&'a str),
DataUri {
subtype: &'a str,
payload: &'a str,
},
RemoteUrl,
}
fn classify_image_src(src: &str) -> ImageSource<'_> {
if src.starts_with("http://") || src.starts_with("https://") {
return ImageSource::RemoteUrl;
}
if let Some(rest) = src.strip_prefix("data:image/") {
if let Some((subtype_and_enc, payload)) = rest.split_once(',') {
if let Some(subtype) = subtype_and_enc.strip_suffix(";base64") {
return ImageSource::DataUri { subtype, payload };
}
}
}
ImageSource::FilePath(src)
}
fn mime_from_extension(path: &str) -> &'static str {
let ext = path.rsplit('.').next().unwrap_or("").to_ascii_lowercase();
match ext.as_str() {
"png" => "image/png",
"jpg" | "jpeg" => "image/jpeg",
"gif" => "image/gif",
"bmp" => "image/bmp",
"svg" => "image/svg+xml",
"webp" => "image/webp",
_ => "application/octet-stream",
}
}
#[derive(Debug)]
struct ResolvedAsset {
entry_name: String,
data: Vec<u8>,
#[allow(dead_code)]
mime_type: String,
}
fn collect_image_assets(doc: &ir::Document) -> (ImageAssetMap, Vec<ResolvedAsset>) {
let mut asset_map: ImageAssetMap = HashMap::new();
let mut resolved: Vec<ResolvedAsset> = Vec::new();
let mut counter: u32 = 0;
for asset in &doc.assets {
let entry_name = std::path::Path::new(&asset.name)
.file_name()
.map(|n| n.to_string_lossy().into_owned())
.unwrap_or_else(|| {
counter += 1;
format!("asset_{counter}")
});
asset_map
.entry(asset.name.clone())
.or_insert_with(|| entry_name.clone());
if !resolved.iter().any(|r| r.entry_name == entry_name) {
resolved.push(ResolvedAsset {
entry_name,
data: asset.data.clone(),
mime_type: asset.mime_type.clone(),
});
}
}
for section in &doc.sections {
collect_images_from_blocks(§ion.blocks, &mut asset_map, &mut resolved, &mut counter);
}
(asset_map, resolved)
}
fn unique_entry_name(preferred: &str, resolved: &[ResolvedAsset]) -> String {
if !resolved.iter().any(|r| r.entry_name == preferred) {
return preferred.to_owned();
}
let (stem, ext) = match preferred.rsplit_once('.') {
Some((s, e)) => (s, format!(".{e}")),
None => (preferred, String::new()),
};
let mut n: u32 = 2;
loop {
let candidate = format!("{stem}_{n}{ext}");
if !resolved.iter().any(|r| r.entry_name == candidate) {
return candidate;
}
n += 1;
if n > 10_000 {
return candidate;
}
}
}
fn collect_images_from_blocks(
blocks: &[ir::Block],
asset_map: &mut ImageAssetMap,
resolved: &mut Vec<ResolvedAsset>,
counter: &mut u32,
) {
for block in blocks {
match block {
ir::Block::Image { src, .. } => {
if asset_map.contains_key(src.as_str()) {
continue;
}
match classify_image_src(src) {
ImageSource::RemoteUrl => {
}
ImageSource::DataUri { subtype, payload } => match base64_decode(payload) {
Ok(bytes) => {
*counter += 1;
let entry_name = format!("image_{counter}.{subtype}");
let mime = format!("image/{subtype}");
asset_map.insert(src.clone(), entry_name.clone());
resolved.push(ResolvedAsset {
entry_name,
data: bytes,
mime_type: mime,
});
}
Err(e) => {
tracing::warn!("Failed to decode data URI for image: {e}");
}
},
ImageSource::FilePath(path) => {
match std::fs::read(path) {
Ok(bytes) => {
let bare = std::path::Path::new(path)
.file_name()
.map(|n| n.to_string_lossy().into_owned())
.unwrap_or_else(|| {
*counter += 1;
format!("image_{counter}")
});
let mime = mime_from_extension(path).to_owned();
let entry_name = unique_entry_name(&bare, resolved);
if entry_name != bare {
tracing::warn!(
"Image filename collision: {path:?} \
renamed to {entry_name:?} to avoid \
overwriting an earlier BinData entry"
);
}
asset_map.insert(src.clone(), entry_name.clone());
resolved.push(ResolvedAsset {
entry_name,
data: bytes,
mime_type: mime,
});
}
Err(e) => {
tracing::warn!("Cannot read image file {path:?}: {e}");
}
}
}
}
}
ir::Block::Table { rows, .. } => {
for row in rows {
for cell in &row.cells {
collect_images_from_blocks(&cell.blocks, asset_map, resolved, counter);
}
}
}
ir::Block::BlockQuote { blocks: inner }
| ir::Block::Footnote { content: inner, .. } => {
collect_images_from_blocks(inner, asset_map, resolved, counter);
}
ir::Block::List { items, .. } => {
for item in items {
collect_images_from_blocks(&item.blocks, asset_map, resolved, counter);
}
}
ir::Block::Heading { .. }
| ir::Block::Paragraph { .. }
| ir::Block::CodeBlock { .. }
| ir::Block::HorizontalRule
| ir::Block::Math { .. } => {}
}
}
}
fn base64_decode(input: &str) -> Result<Vec<u8>, Hwp2MdError> {
const TABLE: [i8; 256] = {
let mut t = [-1i8; 256];
let mut i = 0usize;
while i < 26 {
t[b'A' as usize + i] = i as i8;
i += 1;
}
i = 0;
while i < 26 {
t[b'a' as usize + i] = (i + 26) as i8;
i += 1;
}
i = 0;
while i < 10 {
t[b'0' as usize + i] = (i + 52) as i8;
i += 1;
}
t[b'+' as usize] = 62;
t[b'/' as usize] = 63;
t
};
let input = input.trim();
let pad = input.bytes().rev().take(2).filter(|&b| b == b'=').count();
let clean: Vec<u8> = input
.bytes()
.filter(|b| !b.is_ascii_whitespace() && *b != b'=')
.collect();
if clean.len() % 4 != 0 && (clean.len() + pad) % 4 != 0 {
}
let mut out = Vec::with_capacity(clean.len() * 3 / 4 + 2);
let mut i = 0;
while i + 3 < clean.len() {
let [a, b, c, d] = [
TABLE[clean[i] as usize],
TABLE[clean[i + 1] as usize],
TABLE[clean[i + 2] as usize],
TABLE[clean[i + 3] as usize],
];
if a < 0 || b < 0 || c < 0 || d < 0 {
return Err(Hwp2MdError::HwpxWrite(
"invalid base64 character in data URI".into(),
));
}
let n = (a as u32) << 18 | (b as u32) << 12 | (c as u32) << 6 | (d as u32);
out.push((n >> 16) as u8);
out.push((n >> 8) as u8);
out.push(n as u8);
i += 4;
}
let rem = clean.len() - i;
if rem == 2 {
let [a, b] = [TABLE[clean[i] as usize], TABLE[clean[i + 1] as usize]];
if a < 0 || b < 0 {
return Err(Hwp2MdError::HwpxWrite(
"invalid base64 character in data URI".into(),
));
}
let n = (a as u32) << 18 | (b as u32) << 12;
out.push((n >> 16) as u8);
} else if rem == 3 {
let [a, b, c] = [
TABLE[clean[i] as usize],
TABLE[clean[i + 1] as usize],
TABLE[clean[i + 2] as usize],
];
if a < 0 || b < 0 || c < 0 {
return Err(Hwp2MdError::HwpxWrite(
"invalid base64 character in data URI".into(),
));
}
let n = (a as u32) << 18 | (b as u32) << 12 | (c as u32) << 6;
out.push((n >> 16) as u8);
out.push((n >> 8) as u8);
}
Ok(out)
}
pub fn write_hwpx(
doc: &ir::Document,
output: &Path,
_style: Option<&Path>,
) -> Result<(), Hwp2MdError> {
let tables = RefTables::build(doc);
let (asset_map, resolved_assets) = collect_image_assets(doc);
let file = std::fs::File::create(output)?;
let mut zip = ZipWriter::new(file);
let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Deflated);
zip.start_file(
"mimetype",
SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored),
)?;
zip.write_all(b"application/hwp+zip")?;
zip.start_file("META-INF/container.xml", options)?;
zip.write_all(generate_container_xml().as_bytes())?;
zip.start_file("version.xml", options)?;
zip.write_all(generate_version_xml().as_bytes())?;
zip.start_file("Contents/header.xml", options)?;
zip.write_all(header::generate_header_xml(doc, &tables)?.as_bytes())?;
zip.start_file("Contents/content.hpf", options)?;
zip.write_all(generate_content_hpf(doc, &resolved_assets).as_bytes())?;
for (i, sec) in doc.sections.iter().enumerate() {
let path = format!("Contents/section{i}.xml");
zip.start_file(&path, options)?;
zip.write_all(section::generate_section_xml(sec, i, &tables, &asset_map)?.as_bytes())?;
}
if doc.sections.is_empty() {
zip.start_file("Contents/section0.xml", options)?;
let empty_section = ir::Section {
blocks: Vec::new(),
page_layout: None,
};
zip.write_all(
section::generate_section_xml(&empty_section, 0, &tables, &asset_map)?.as_bytes(),
)?;
}
for asset in &resolved_assets {
let path = format!("BinData/{}", asset.entry_name);
zip.start_file(&path, options)?;
zip.write_all(&asset.data)?;
}
zip.finish()?;
Ok(())
}
fn generate_container_xml() -> String {
r#"<?xml version="1.0" encoding="UTF-8"?>
<container version="1.0">
<rootfiles>
<rootfile full-path="Contents/content.hpf" media-type="application/hwp+xml"/>
</rootfiles>
</container>"#
.to_string()
}
fn generate_version_xml() -> String {
r#"<?xml version="1.0" encoding="UTF-8"?>
<hh:HWPCompatibleDocument xmlns:hh="http://www.hancom.co.kr/hwpml/2011/head" version="1.1">
<hh:DocInfo>
<hh:HWPVersion Major="5" Minor="1" Micro="0" Build="0"/>
</hh:DocInfo>
</hh:HWPCompatibleDocument>"#
.to_string()
}
fn generate_content_hpf(doc: &ir::Document, resolved_assets: &[ResolvedAsset]) -> String {
let section_count = doc.sections.len().max(1);
let mut items = String::new();
for i in 0..section_count {
items.push_str(&format!(
" <hp:item href=\"section{i}.xml\" type=\"Section\"/>\n"
));
}
let has_title = doc.metadata.title.as_ref().is_some_and(|t| !t.is_empty());
let has_author = doc.metadata.author.as_ref().is_some_and(|a| !a.is_empty());
let doc_info = if has_title || has_author {
let mut info = String::from(" <hp:docInfo>\n");
if let Some(title) = doc.metadata.title.as_deref() {
if !title.is_empty() {
info.push_str(&format!(
" <hp:title>{}</hp:title>\n",
xml_escape_content(title)
));
}
}
if let Some(author) = doc.metadata.author.as_deref() {
if !author.is_empty() {
info.push_str(&format!(
" <hp:author>{}</hp:author>\n",
xml_escape_content(author)
));
}
}
info.push_str(" </hp:docInfo>\n");
info
} else {
String::new()
};
let mut bin_data_entries = String::new();
for asset in resolved_assets {
let item_id = std::path::Path::new(&asset.entry_name)
.file_stem()
.map(|s| s.to_string_lossy().into_owned())
.unwrap_or_else(|| asset.entry_name.clone());
bin_data_entries.push_str(&format!(
" <hp:binData itemId=\"{}\" file=\"BinData/{}\" type=\"EMBED\" compress=\"true\"/>\n",
xml_escape_content(&item_id),
xml_escape_content(&asset.entry_name),
));
}
format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<hp:HWPMLPackage xmlns:hp="http://www.hancom.co.kr/hwpml/2011/packageInfo">
<hp:compatibledocument version="1.1"/>
{doc_info} <hp:contents>
{items} </hp:contents>
{bin_data_entries}</hp:HWPMLPackage>"#
)
}
fn xml_escape_content(s: &str) -> String {
s.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
.replace('"', """)
.replace('\'', "'")
}
#[cfg(test)]
#[path = "writer_tests.rs"]
mod tests;