hwpers 0.5.0

A Rust library for parsing Korean Hangul Word Processor (HWP) files with full layout rendering support
Documentation
use crate::error::{HwpError, Result};

#[derive(Debug, Clone, Default)]
pub struct SummaryInfo {
    pub title: Option<String>,
    pub subject: Option<String>,
    pub author: Option<String>,
    pub keywords: Option<String>,
    pub comments: Option<String>,
    pub last_saved_by: Option<String>,
    pub revision_number: Option<String>,
    pub creation_date: Option<i64>,
    pub last_saved_date: Option<i64>,
    pub page_count: Option<i32>,
    pub word_count: Option<i32>,
    pub char_count: Option<i32>,
}

const PROPERTY_SET_HEADER_SIZE: usize = 28;
const PROPERTY_ID_TITLE: u32 = 0x02;
const PROPERTY_ID_SUBJECT: u32 = 0x03;
const PROPERTY_ID_AUTHOR: u32 = 0x04;
const PROPERTY_ID_KEYWORDS: u32 = 0x05;
const PROPERTY_ID_COMMENTS: u32 = 0x06;
const PROPERTY_ID_LAST_SAVED_BY: u32 = 0x08;
const PROPERTY_ID_REVISION_NUMBER: u32 = 0x09;
const PROPERTY_ID_CREATION_DATE: u32 = 0x0C;
const PROPERTY_ID_LAST_SAVED_DATE: u32 = 0x0D;
const PROPERTY_ID_PAGE_COUNT: u32 = 0x0E;
const PROPERTY_ID_WORD_COUNT: u32 = 0x0F;
const PROPERTY_ID_CHAR_COUNT: u32 = 0x10;

const VT_LPSTR: u32 = 0x1E;
const VT_FILETIME: u32 = 0x40;
const VT_I4: u32 = 0x03;

impl SummaryInfo {
    pub fn from_bytes(data: &[u8]) -> Result<Self> {
        if data.len() < PROPERTY_SET_HEADER_SIZE {
            return Err(HwpError::ParseError(
                "SummaryInfo data too short".to_string(),
            ));
        }

        let byte_order = u16::from_le_bytes([data[0], data[1]]);
        if byte_order != 0xFFFE {
            return Err(HwpError::ParseError(format!(
                "Invalid OLE property byte order: 0x{:04X}",
                byte_order
            )));
        }

        let mut info = SummaryInfo::default();

        if data.len() < 48 {
            return Ok(info);
        }

        let section_offset = u32::from_le_bytes([data[44], data[45], data[46], data[47]]) as usize;

        if data.len() < section_offset + 8 {
            return Ok(info);
        }

        let property_count = u32::from_le_bytes([
            data[section_offset + 4],
            data[section_offset + 5],
            data[section_offset + 6],
            data[section_offset + 7],
        ]) as usize;

        let properties_start = section_offset + 8;

        for i in 0..property_count {
            let prop_offset = properties_start + i * 8;
            if data.len() < prop_offset + 8 {
                break;
            }

            let prop_id = u32::from_le_bytes([
                data[prop_offset],
                data[prop_offset + 1],
                data[prop_offset + 2],
                data[prop_offset + 3],
            ]);
            let prop_value_offset = u32::from_le_bytes([
                data[prop_offset + 4],
                data[prop_offset + 5],
                data[prop_offset + 6],
                data[prop_offset + 7],
            ]) as usize;

            let absolute_offset = section_offset + prop_value_offset;
            if data.len() < absolute_offset + 8 {
                continue;
            }

            let prop_type = u32::from_le_bytes([
                data[absolute_offset],
                data[absolute_offset + 1],
                data[absolute_offset + 2],
                data[absolute_offset + 3],
            ]);

            match prop_id {
                PROPERTY_ID_TITLE => {
                    info.title = Self::read_string_property(data, absolute_offset, prop_type);
                }
                PROPERTY_ID_SUBJECT => {
                    info.subject = Self::read_string_property(data, absolute_offset, prop_type);
                }
                PROPERTY_ID_AUTHOR => {
                    info.author = Self::read_string_property(data, absolute_offset, prop_type);
                }
                PROPERTY_ID_KEYWORDS => {
                    info.keywords = Self::read_string_property(data, absolute_offset, prop_type);
                }
                PROPERTY_ID_COMMENTS => {
                    info.comments = Self::read_string_property(data, absolute_offset, prop_type);
                }
                PROPERTY_ID_LAST_SAVED_BY => {
                    info.last_saved_by =
                        Self::read_string_property(data, absolute_offset, prop_type);
                }
                PROPERTY_ID_REVISION_NUMBER => {
                    info.revision_number =
                        Self::read_string_property(data, absolute_offset, prop_type);
                }
                PROPERTY_ID_CREATION_DATE => {
                    info.creation_date =
                        Self::read_filetime_property(data, absolute_offset, prop_type);
                }
                PROPERTY_ID_LAST_SAVED_DATE => {
                    info.last_saved_date =
                        Self::read_filetime_property(data, absolute_offset, prop_type);
                }
                PROPERTY_ID_PAGE_COUNT => {
                    info.page_count = Self::read_i32_property(data, absolute_offset, prop_type);
                }
                PROPERTY_ID_WORD_COUNT => {
                    info.word_count = Self::read_i32_property(data, absolute_offset, prop_type);
                }
                PROPERTY_ID_CHAR_COUNT => {
                    info.char_count = Self::read_i32_property(data, absolute_offset, prop_type);
                }
                _ => {}
            }
        }

        Ok(info)
    }

    fn read_string_property(data: &[u8], offset: usize, prop_type: u32) -> Option<String> {
        if prop_type != VT_LPSTR {
            return None;
        }

        if data.len() < offset + 8 {
            return None;
        }

        let str_len = u32::from_le_bytes([
            data[offset + 4],
            data[offset + 5],
            data[offset + 6],
            data[offset + 7],
        ]) as usize;

        if str_len == 0 || data.len() < offset + 8 + str_len {
            return None;
        }

        let str_bytes = &data[offset + 8..offset + 8 + str_len];
        let trimmed = str_bytes
            .iter()
            .take_while(|&&b| b != 0)
            .copied()
            .collect::<Vec<u8>>();

        String::from_utf8(trimmed).ok()
    }

    fn read_filetime_property(data: &[u8], offset: usize, prop_type: u32) -> Option<i64> {
        if prop_type != VT_FILETIME {
            return None;
        }

        if data.len() < offset + 12 {
            return None;
        }

        let low = u32::from_le_bytes([
            data[offset + 4],
            data[offset + 5],
            data[offset + 6],
            data[offset + 7],
        ]) as i64;
        let high = u32::from_le_bytes([
            data[offset + 8],
            data[offset + 9],
            data[offset + 10],
            data[offset + 11],
        ]) as i64;

        Some((high << 32) | low)
    }

    fn read_i32_property(data: &[u8], offset: usize, prop_type: u32) -> Option<i32> {
        if prop_type != VT_I4 {
            return None;
        }

        if data.len() < offset + 8 {
            return None;
        }

        Some(i32::from_le_bytes([
            data[offset + 4],
            data[offset + 5],
            data[offset + 6],
            data[offset + 7],
        ]))
    }

    pub fn has_metadata(&self) -> bool {
        self.title.is_some()
            || self.author.is_some()
            || self.subject.is_some()
            || self.keywords.is_some()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_empty_summary_info() {
        let result = SummaryInfo::from_bytes(&[]);
        assert!(result.is_err());
    }

    #[test]
    fn test_invalid_byte_order() {
        let data = vec![0x00, 0x00];
        let result = SummaryInfo::from_bytes(&data);
        assert!(result.is_err());
    }

    #[test]
    fn test_minimal_valid_header() {
        let mut data = vec![0xFE, 0xFF];
        data.extend(vec![0u8; 46]);
        let result = SummaryInfo::from_bytes(&data);
        assert!(result.is_ok());
        assert!(!result.unwrap().has_metadata());
    }
}