jw-hwp-core 0.1.2

Read-only parser for Hancom HWP 5.0 (binary CFB) and HWPX (OWPML) documents
Documentation
//! HWPX (OWPML/ZIP-based) parser. Sits beside the binary HWP reader and
//! produces the same `HwpDocument` shape.

pub mod header;
pub mod manifest;
pub mod section;
pub mod writer;
pub mod xml_gen;
pub mod zip_reader;

use crate::error::Error;
use crate::model::HwpDocument;
use std::path::Path;

pub fn read_hwpx(path: &Path) -> Result<HwpDocument, Error> {
    let mut archive = zip_reader::Archive::open(path)?;
    archive.validate_mimetype()?;
    let hpf = archive.read_entry("Contents/content.hpf")?;
    let mut manifest = manifest::parse(&hpf)?;
    let shapes = if archive.has_entry("Contents/header.xml") {
        let hdr = archive.read_entry("Contents/header.xml")?;
        header::parse(&hdr)?
    } else {
        Default::default()
    };
    // Populate asset size_bytes from zip central directory.
    for entry in manifest.assets.entries.iter_mut() {
        if entry.size_bytes.is_none() {
            entry.size_bytes = archive.entry_size(&entry.name);
        }
    }
    let mut sections = Vec::new();
    let mut warnings = Vec::new();
    for (idx, href) in manifest.section_files.iter().enumerate() {
        if !archive.has_entry(href) {
            continue;
        }
        let bytes = archive.read_entry(href)?;
        let out = section::parse(&bytes, idx)?;
        sections.push(out.section);
        warnings.extend(out.warnings);
    }
    Ok(HwpDocument {
        version: "hwpx".into(),
        metadata: manifest.metadata,
        properties: Default::default(),
        shapes,
        sections,
        assets: manifest.assets,
        warnings,
    })
}

#[cfg(test)]
mod tests {

    use std::path::PathBuf;

    fn fixture(name: &str) -> PathBuf {
        PathBuf::from(env!("CARGO_MANIFEST_DIR"))
            .join("tests/fixtures/hwpx")
            .join(name)
    }

    #[test]
    fn opens_formatting_showcase() {
        let p = fixture("FormattingShowcase.hwpx");
        if !p.exists() {
            eprintln!("missing fixture, skipping");
            return;
        }
        let doc = crate::open(&p).expect("open hwpx");
        assert_eq!(doc.metadata.author.as_deref(), Some("fff"));
        assert_eq!(doc.metadata.last_author.as_deref(), Some("kokyu"));
        assert!(doc.metadata.created_at.is_some());
        assert!(doc.metadata.modified_at.is_some());
        // At least one bold and one italic char shape.
        assert!(doc.shapes.char_shapes.values().any(|c| c.bold));
        assert!(doc.shapes.char_shapes.values().any(|c| c.italic));
        assert!(!doc.sections.is_empty());
        assert!(!doc.sections[0].paragraphs.is_empty());
        assert!(
            !doc.sections[0].tables.is_empty(),
            "expected at least one table"
        );
        let tbl = &doc.sections[0].tables[0];
        assert_eq!((tbl.rows, tbl.cols), (3, 3));
    }
}