jw-hwp-core 0.1.2

Read-only parser for Hancom HWP 5.0 (binary CFB) and HWPX (OWPML) documents
Documentation
use crate::container::Container;
use crate::error::Error;
use crate::ole_property::{parse, PropValue};

pub const STREAM_NAME: &str = "/\u{0005}HwpSummaryInformation";

pub const PIDSI_TITLE: u32 = 0x02;
pub const PIDSI_SUBJECT: u32 = 0x03;
pub const PIDSI_AUTHOR: u32 = 0x04;
pub const PIDSI_KEYWORDS: u32 = 0x05;
pub const PIDSI_COMMENTS: u32 = 0x06;
pub const PIDSI_LAST_AUTHOR: u32 = 0x08;
pub const PIDSI_REVISION: u32 = 0x09;
pub const PIDSI_LAST_PRINTED: u32 = 0x0B;
pub const PIDSI_CREATE_DTM: u32 = 0x0C;
pub const PIDSI_LAST_SAVE_DTM: u32 = 0x0D;
pub const PIDSI_PAGE_COUNT: u32 = 0x0E;
pub const HWPPIDSI_DATE_STR: u32 = 0x14;
pub const HWPPIDSI_PARACOUNT: u32 = 0x15;

#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize)]
pub struct Metadata {
    pub title: Option<String>,
    pub subject: Option<String>,
    pub author: Option<String>,
    pub keywords: Option<String>,
    pub comments: Option<String>,
    pub last_author: Option<String>,
    pub revision: Option<String>,
    pub date_string: Option<String>,
    pub page_count: Option<i32>,
    pub para_count: Option<i32>,
    pub created_at: Option<String>,
    pub modified_at: Option<String>,
    pub printed_at: Option<String>,
}

pub fn read(container: &mut Container) -> Result<Metadata, Error> {
    let bytes = match container.read_raw_stream(STREAM_NAME) {
        Ok(b) => b,
        Err(Error::MissingStream(_)) => return Ok(Metadata::default()),
        Err(e) => return Err(e),
    };
    let sections = parse(&bytes)?;
    let mut md = Metadata::default();
    for section in sections {
        for (pid, val) in section.properties {
            match (pid, &val) {
                (PIDSI_TITLE, PropValue::String(s)) => md.title = Some(s.clone()),
                (PIDSI_SUBJECT, PropValue::String(s)) => md.subject = Some(s.clone()),
                (PIDSI_AUTHOR, PropValue::String(s)) => md.author = Some(s.clone()),
                (PIDSI_KEYWORDS, PropValue::String(s)) => md.keywords = Some(s.clone()),
                (PIDSI_COMMENTS, PropValue::String(s)) => md.comments = Some(s.clone()),
                (PIDSI_LAST_AUTHOR, PropValue::String(s)) => md.last_author = Some(s.clone()),
                (PIDSI_REVISION, PropValue::String(s)) => md.revision = Some(s.clone()),
                (HWPPIDSI_DATE_STR, PropValue::String(s)) => md.date_string = Some(s.clone()),
                (PIDSI_PAGE_COUNT, PropValue::I4(v)) => md.page_count = Some(*v),
                (HWPPIDSI_PARACOUNT, PropValue::I4(v)) => md.para_count = Some(*v),
                (PIDSI_CREATE_DTM, PropValue::FileTime(t)) => md.created_at = filetime_to_iso(*t),
                (PIDSI_LAST_SAVE_DTM, PropValue::FileTime(t)) => {
                    md.modified_at = filetime_to_iso(*t)
                }
                (PIDSI_LAST_PRINTED, PropValue::FileTime(t)) => md.printed_at = filetime_to_iso(*t),
                _ => {}
            }
        }
    }
    Ok(md)
}

pub fn filetime_to_iso(ticks_100ns: u64) -> Option<String> {
    if ticks_100ns == 0 {
        return None;
    }
    const UNIX_EPOCH_OFFSET_SECONDS: u64 = 11_644_473_600;
    const TICKS_PER_SEC: u64 = 10_000_000;
    let secs_since_filetime_epoch = ticks_100ns / TICKS_PER_SEC;
    if secs_since_filetime_epoch < UNIX_EPOCH_OFFSET_SECONDS {
        return None;
    }
    let secs = secs_since_filetime_epoch - UNIX_EPOCH_OFFSET_SECONDS;
    Some(format_unix_seconds(secs))
}

fn format_unix_seconds(secs: u64) -> String {
    let days = (secs / 86_400) as i64;
    let rem = secs % 86_400;
    let (h, m, s) = (
        (rem / 3600) as u32,
        ((rem / 60) % 60) as u32,
        (rem % 60) as u32,
    );
    let z = days + 719_468;
    let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
    let doe = (z - era * 146_097) as u64;
    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
    let y = yoe as i64 + era * 400;
    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
    let mp = (5 * doy + 2) / 153;
    let d = (doy - (153 * mp + 2) / 5 + 1) as u32;
    let mo = if mp < 10 { mp + 3 } else { mp - 9 } as u32;
    let yr = y + if mo <= 2 { 1 } else { 0 };
    format!("{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z", yr, mo, d, h, m, s)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn filetime_zero_is_none() {
        assert_eq!(filetime_to_iso(0), None);
    }

    #[test]
    fn filetime_unix_epoch_is_1970() {
        let ft = 11_644_473_600u64 * 10_000_000;
        assert_eq!(filetime_to_iso(ft).as_deref(), Some("1970-01-01T00:00:00Z"));
    }

    #[test]
    fn filetime_known_date() {
        let secs_since_unix = 1_582_979_696u64;
        let ft = (secs_since_unix + 11_644_473_600) * 10_000_000;
        assert_eq!(filetime_to_iso(ft).as_deref(), Some("2020-02-29T12:34:56Z"));
    }
}