jw-hwp-core 0.1.2

Read-only parser for Hancom HWP 5.0 (binary CFB) and HWPX (OWPML) documents
Documentation
//! Generate HWPX XML strings from domain model.

use crate::model::HwpDocument;
use crate::shape::Align;

pub fn gen_mimetype() -> Vec<u8> {
    b"application/hwp+zip".to_vec()
}

pub fn gen_version_xml() -> String {
    r#"<?xml version="1.0" encoding="UTF-8" standalone="yes" ?><hv:HWPVersion xmlns:hv="http://www.hancom.co.kr/hwpml/2011/version" version="1.5"/>"#.to_string()
}

pub fn gen_settings_xml() -> String {
    r#"<?xml version="1.0" encoding="UTF-8" standalone="yes" ?><ha:HWPApplicationSetting xmlns:ha="http://www.hancom.co.kr/hwpml/2011/app"><ha:CaretPosition list="0" para="0" pos="0"/></ha:HWPApplicationSetting>"#.to_string()
}

pub fn gen_container_xml() -> String {
    r#"<?xml version="1.0" encoding="UTF-8" standalone="yes" ?><ocf:container xmlns:ocf="urn:oasis:names:tc:opendocument:xmlns:container" xmlns:hpf="http://www.hancom.co.kr/schema/2011/hpf"><ocf:rootfiles><ocf:rootfile full-path="Contents/content.hpf" media-type="application/hwpml-package+xml"/><ocf:rootfile full-path="Preview/PrvText.txt" media-type="text/plain"/><ocf:rootfile full-path="META-INF/container.rdf" media-type="application/rdf+xml"/></ocf:rootfiles></ocf:container>"#.to_string()
}

pub fn gen_manifest_xml() -> String {
    r#"<?xml version="1.0" encoding="UTF-8" standalone="yes" ?><odf:manifest xmlns:odf="urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"/>"#.to_string()
}

pub fn gen_container_rdf(doc: &HwpDocument) -> String {
    let mut parts = String::from(
        r#"<?xml version="1.0" encoding="UTF-8" standalone="yes" ?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><rdf:Description rdf:about=""><ns0:hasPart xmlns:ns0="http://www.hancom.co.kr/hwpml/2016/meta/pkg#" rdf:resource="Contents/header.xml"/></rdf:Description><rdf:Description rdf:about="Contents/header.xml"><rdf:type rdf:resource="http://www.hancom.co.kr/hwpml/2016/meta/pkg#HeaderFile"/></rdf:Description>"#,
    );
    for (i, _) in doc.sections.iter().enumerate() {
        parts.push_str(&format!(
            r#"<rdf:Description rdf:about=""><ns0:hasPart xmlns:ns0="http://www.hancom.co.kr/hwpml/2016/meta/pkg#" rdf:resource="Contents/section{i}.xml"/></rdf:Description><rdf:Description rdf:about="Contents/section{i}.xml"><rdf:type rdf:resource="http://www.hancom.co.kr/hwpml/2016/meta/pkg#SectionFile"/></rdf:Description>"#
        ));
    }
    parts.push_str(r#"<rdf:Description rdf:about=""><rdf:type rdf:resource="http://www.hancom.co.kr/hwpml/2016/meta/pkg#Document"/></rdf:Description></rdf:RDF>"#);
    parts
}

pub fn gen_prv_text(doc: &HwpDocument) -> String {
    doc.full_text().chars().take(500).collect()
}

/// Hancom-wide namespace declarations required on content.hpf / header.xml / sectionN.xml
/// root elements. 한글 Office strict-rejects files missing these.
const HANCOM_NS: &str = concat!(
    r#"xmlns:ha="http://www.hancom.co.kr/hwpml/2011/app" "#,
    r#"xmlns:hp="http://www.hancom.co.kr/hwpml/2011/paragraph" "#,
    r#"xmlns:hp10="http://www.hancom.co.kr/hwpml/2016/paragraph" "#,
    r#"xmlns:hs="http://www.hancom.co.kr/hwpml/2011/section" "#,
    r#"xmlns:hc="http://www.hancom.co.kr/hwpml/2011/core" "#,
    r#"xmlns:hh="http://www.hancom.co.kr/hwpml/2011/head" "#,
    r#"xmlns:hhs="http://www.hancom.co.kr/hwpml/2011/history" "#,
    r#"xmlns:hm="http://www.hancom.co.kr/hwpml/2011/master-page" "#,
    r#"xmlns:hpf="http://www.hancom.co.kr/schema/2011/hpf" "#,
    r#"xmlns:dc="http://purl.org/dc/elements/1.1/" "#,
    r#"xmlns:opf="http://www.idpf.org/2007/opf/" "#,
    r#"xmlns:ooxmlchart="http://www.hancom.co.kr/hwpml/2016/ooxmlchart" "#,
    r#"xmlns:hwpunitchar="http://www.hancom.co.kr/hwpml/2016/HwpUnitChar" "#,
    r#"xmlns:epub="http://www.idpf.org/2007/ops" "#,
    r#"xmlns:config="urn:oasis:names:tc:opendocument:xmlns:config:1.0""#,
);

pub fn gen_content_hpf(doc: &HwpDocument) -> String {
    let title = doc.metadata.title.as_deref().unwrap_or("");
    let author = doc.metadata.author.as_deref().unwrap_or("MCP");
    let created = doc.metadata.created_at.as_deref().unwrap_or("");
    let modified = doc.metadata.modified_at.as_deref().unwrap_or("");
    format!(
        r#"<?xml version="1.0" encoding="UTF-8" standalone="yes" ?><opf:package {HANCOM_NS} version="" unique-identifier="" id=""><opf:metadata><opf:title>{title}</opf:title><opf:language>ko</opf:language><opf:meta name="creator" content="text">{author}</opf:meta><opf:meta name="lastsaveby" content="text">{author}</opf:meta><opf:meta name="CreatedDate" content="text">{created}</opf:meta><opf:meta name="ModifiedDate" content="text">{modified}</opf:meta></opf:metadata><opf:manifest><opf:item id="header" href="Contents/header.xml" media-type="application/xml"/><opf:item id="section0" href="Contents/section0.xml" media-type="application/xml"/><opf:item id="settings" href="settings.xml" media-type="application/xml"/></opf:manifest><opf:spine><opf:itemref idref="header" linear="yes"/><opf:itemref idref="section0" linear="yes"/></opf:spine></opf:package>"#
    )
}

fn xml_escape(s: &str) -> String {
    s.replace('&', "&amp;")
        .replace('<', "&lt;")
        .replace('>', "&gt;")
        .replace('"', "&quot;")
}

fn align_str(a: &Align) -> &'static str {
    match a {
        Align::Both => "JUSTIFY",
        Align::Left => "LEFT",
        Align::Right => "RIGHT",
        Align::Center => "CENTER",
        Align::Distributed => "DISTRIBUTE",
        Align::Division => "DIVISION",
        Align::Unknown => "LEFT",
    }
}

fn color_to_hex(c: u32) -> String {
    // c is 0x00BBGGRR, output is #RRGGBB
    let r = c & 0xFF;
    let g = (c >> 8) & 0xFF;
    let b = (c >> 16) & 0xFF;
    format!("#{:02X}{:02X}{:02X}", r, g, b)
}

pub fn gen_header_xml(doc: &HwpDocument) -> String {
    let mut xml = format!(
        r#"<?xml version="1.0" encoding="UTF-8" standalone="yes" ?><hh:head {HANCOM_NS} version="1.5" secCnt="1">"#
    );
    xml.push_str(
        r#"<hh:beginNum page="1" footnote="1" endnote="1" pic="1" tbl="1" equation="1"/>"#,
    );
    xml.push_str("<hh:refList>");

    // fontfaces — single default font for all 7 lang slots
    xml.push_str(r#"<hh:fontfaces itemCnt="7">"#);
    for lang in &[
        "HANGUL", "LATIN", "HANJA", "JAPANESE", "OTHER", "SYMBOL", "USER",
    ] {
        xml.push_str(&format!(
            r#"<hh:fontface lang="{lang}" fontCnt="1"><hh:font id="0" face="함초롬돋움" type="TTF" isEmbedded="0"><hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="4" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/></hh:font></hh:fontface>"#
        ));
    }
    xml.push_str("</hh:fontfaces>");

    // charProperties
    let mut sorted_chars: Vec<_> = doc.shapes.char_shapes.iter().collect();
    sorted_chars.sort_by_key(|(id, _)| *id);
    xml.push_str(&format!(
        r#"<hh:charProperties itemCnt="{}">"#,
        sorted_chars.len()
    ));
    for (&id, cs) in &sorted_chars {
        let height = (cs.size_pt * 100.0) as u32;
        let color = color_to_hex(cs.color);
        xml.push_str(&format!(
            r#"<hh:charPr id="{id}" height="{height}" textColor="{color}" shadeColor="none" useFontSpace="0" useKerning="0" symMark="NONE" borderFillIDRef="0">"#
        ));
        xml.push_str(r#"<hh:fontRef hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>"#);
        xml.push_str(r#"<hh:ratio hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/>"#);
        xml.push_str(r#"<hh:spacing hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>"#);
        xml.push_str(r#"<hh:relSz hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/>"#);
        xml.push_str(r#"<hh:offset hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>"#);
        if cs.bold {
            xml.push_str("<hh:bold/>");
        }
        if cs.italic {
            xml.push_str("<hh:italic/>");
        }
        let utype = if cs.underline { "BOTTOM" } else { "NONE" };
        xml.push_str(&format!(
            "<hh:underline type=\"{utype}\" shape=\"SOLID\" color=\"#000000\"/>"
        ));
        let sshape = if cs.strikethrough { "SLASH" } else { "NONE" };
        xml.push_str(&format!(
            "<hh:strikeout shape=\"{sshape}\" color=\"#000000\"/>"
        ));
        xml.push_str("<hh:outline type=\"NONE\"/>");
        xml.push_str("<hh:shadow type=\"NONE\" color=\"#B2B2B2\" offsetX=\"10\" offsetY=\"10\"/>");
        xml.push_str("</hh:charPr>");
    }
    xml.push_str("</hh:charProperties>");

    // paraProperties
    let mut sorted_paras: Vec<_> = doc.shapes.para_shapes.iter().collect();
    sorted_paras.sort_by_key(|(id, _)| *id);
    xml.push_str(&format!(
        r#"<hh:paraProperties itemCnt="{}">"#,
        sorted_paras.len()
    ));
    for (&id, ps) in &sorted_paras {
        xml.push_str(&format!(
            r#"<hh:paraPr id="{id}" tabPrIDRef="0" condense="0" fontLineHeight="0" snapToGrid="1" suppressLineNumbers="0" checked="0" textDir="LTR">"#
        ));
        xml.push_str(&format!(
            r#"<hh:align horizontal="{}" vertical="BASELINE"/>"#,
            align_str(&ps.align)
        ));
        xml.push_str(r#"<hh:heading type="NONE" idRef="0" level="0"/>"#);
        xml.push_str(r#"<hh:breakSetting breakLatinWord="KEEP_WORD" breakNonLatinWord="KEEP_WORD" widowOrphan="0" keepWithNext="0" keepLines="0" pageBreakBefore="0" lineWrap="BREAK"/>"#);
        xml.push_str(r#"<hh:autoSpacing eAsianEng="0" eAsianNum="0"/>"#);
        xml.push_str(&format!(
            r#"<hh:margin left="{}" right="{}" indent="{}" prev="{}" next="{}"/>"#,
            ps.left_margin, ps.right_margin, ps.indent, ps.space_before, ps.space_after
        ));
        xml.push_str(&format!(
            r#"<hh:lineSpacing type="PERCENT" value="{}" unit="HWPUNIT"/>"#,
            ps.line_spacing
        ));
        xml.push_str("</hh:paraPr>");
    }
    xml.push_str("</hh:paraProperties>");

    // borderFills — minimal stub (needed by charPr borderFillIDRef)
    xml.push_str("<hh:borderFills itemCnt=\"1\"><hh:borderFill id=\"0\" threeD=\"0\" shadow=\"0\" centerLine=\"NONE\" breakCellSeparateLine=\"0\"><hh:slash type=\"NONE\"/><hh:backSlash type=\"NONE\"/><hh:leftBorder type=\"NONE\" width=\"0.12 mm\" color=\"#000000\"/><hh:rightBorder type=\"NONE\" width=\"0.12 mm\" color=\"#000000\"/><hh:topBorder type=\"NONE\" width=\"0.12 mm\" color=\"#000000\"/><hh:bottomBorder type=\"NONE\" width=\"0.12 mm\" color=\"#000000\"/></hh:borderFill></hh:borderFills>");

    xml.push_str("</hh:refList></hh:head>");
    xml
}

pub fn gen_section_xml(doc: &HwpDocument) -> String {
    let sec = &doc.sections[0];
    let mut xml =
        format!(r#"<?xml version="1.0" encoding="UTF-8" standalone="yes" ?><hs:sec {HANCOM_NS}>"#);

    // Collect table paragraph indices for quick lookup
    let table_para_indices: std::collections::HashSet<usize> = sec
        .tables
        .iter()
        .filter_map(|t| t.id.split(':').nth(1)?.parse::<usize>().ok())
        .collect();

    for (i, detail) in sec.paragraph_details.iter().enumerate() {
        if table_para_indices.contains(&i) {
            // Emit the table at this position
            if let Some(tbl) = sec.tables.iter().find(|t| t.id == format!("0:{i}")) {
                xml.push_str(&gen_table_xml(tbl));
            }
        } else {
            xml.push_str(&gen_paragraph_xml(detail));
        }
    }
    xml.push_str("</hs:sec>");
    xml
}

fn gen_paragraph_xml(d: &crate::model::ParagraphDetail) -> String {
    let para_id = d.para_shape_id;
    let mut xml = format!(
        r#"<hp:p paraPrIDRef="{para_id}" styleIDRef="0" pageBreak="0" columnBreak="0" merged="0">"#
    );
    if d.runs.is_empty() {
        // Single run with default shape
        xml.push_str(r#"<hp:run charPrIDRef="0">"#);
        xml.push_str(&format!("<hp:t>{}</hp:t>", xml_escape(&d.text)));
        xml.push_str("</hp:run>");
    } else {
        // Multi-run: each (start, char_shape_id) pair
        for (ri, &(start, csid)) in d.runs.iter().enumerate() {
            let end = d
                .runs
                .get(ri + 1)
                .map(|(s, _)| *s as usize)
                .unwrap_or(d.text.chars().count());
            let slice: String = d
                .text
                .chars()
                .skip(start as usize)
                .take(end - start as usize)
                .collect();
            xml.push_str(&format!(r#"<hp:run charPrIDRef="{csid}">"#));
            xml.push_str(&format!("<hp:t>{}</hp:t>", xml_escape(&slice)));
            xml.push_str("</hp:run>");
        }
    }
    xml.push_str("</hp:p>");
    xml
}

fn gen_table_xml(tbl: &crate::table::Table) -> String {
    let mut xml = format!(
        r#"<hp:tbl id="0" zOrder="0" numberingType="TABLE" textWrap="TOP_AND_BOTTOM" textFlow="BOTH_SIDES" lock="0" dropcapstyle="None" pageBreak="CELL" repeatHeader="0" rowCnt="{}" colCnt="{}" cellSpacing="0" borderFillIDRef="0" noAdjust="0">"#,
        tbl.rows, tbl.cols
    );
    // colPr — equal width columns (using default 4000 HWPUNIT per col)
    for c in 0..tbl.cols {
        xml.push_str(&format!(r#"<hp:colPr id="{c}" width="4000" isFixed="0"/>"#));
    }
    for row in &tbl.cells {
        xml.push_str("<hp:tr>");
        for cell_opt in row {
            xml.push_str("<hp:tc>");
            if let Some(cell) = cell_opt {
                xml.push_str(&format!(
                    r#"<hp:cellAddr colAddr="{}" rowAddr="{}"/>"#,
                    cell.col, cell.row
                ));
                xml.push_str(&format!(
                    r#"<hp:cellSpan colSpan="{}" rowSpan="{}"/>"#,
                    cell.col_span, cell.row_span
                ));
                xml.push_str(r#"<hp:cellSz width="4000" height="1000"/>"#);
                xml.push_str(r#"<hp:cellMargin left="0" right="0" top="0" bottom="0"/>"#);
                xml.push_str("<hp:subList>");
                for para_text in &cell.paragraphs {
                    xml.push_str(&format!(
                        r#"<hp:p paraPrIDRef="0" styleIDRef="0" pageBreak="0" columnBreak="0" merged="0"><hp:run charPrIDRef="0"><hp:t>{}</hp:t></hp:run></hp:p>"#,
                        xml_escape(para_text)
                    ));
                }
                xml.push_str("</hp:subList>");
            }
            xml.push_str("</hp:tc>");
        }
        xml.push_str("</hp:tr>");
    }
    xml.push_str("</hp:tbl>");
    xml
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn gen_section_produces_valid_xml_with_paragraph() {
        let mut doc = HwpDocument::new_for_writing(None);
        doc.add_paragraph("Hello", 0, 0, None);
        let xml = gen_section_xml(&doc);
        assert!(xml.contains("<hp:t>Hello</hp:t>"));
        assert!(xml.contains("paraPrIDRef=\"0\""));
    }

    #[test]
    fn gen_header_includes_bold_charpr() {
        let mut doc = HwpDocument::new_for_writing(None);
        doc.ensure_char_shape(true, false, false, 12.0, 0);
        let xml = gen_header_xml(&doc);
        assert!(xml.contains("<hh:bold/>"));
        assert!(xml.contains("height=\"1200\""));
    }

    #[test]
    fn xml_escape_handles_special_chars() {
        assert_eq!(xml_escape("a<b>c&d"), "a&lt;b&gt;c&amp;d");
    }
}