jw-hwp-core 0.1.2

Read-only parser for Hancom HWP 5.0 (binary CFB) and HWPX (OWPML) documents
Documentation
//! Parser for HWPTAG_STYLE (DocInfo-level named styles).

use crate::error::Error;

#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize)]
pub struct Style {
    pub local_name: String,
    pub english_name: String,
    pub para_shape_id: u16,
    pub char_shape_id: u16,
}

pub fn parse_style(p: &[u8]) -> Result<Style, Error> {
    let mut cur = 0usize;
    let local_name =
        read_wstr(p, &mut cur).ok_or_else(|| Error::Record("Style: local_name".into()))?;
    let english_name = read_wstr(p, &mut cur).unwrap_or_default();
    if cur + 4 > p.len() {
        return Err(Error::Record("Style: truncated after names".into()));
    }
    cur += 2; // 1B properties + 1B next_style_id
    cur += 2; // 2B lang_id
    if cur + 4 > p.len() {
        return Err(Error::Record("Style: truncated shape refs".into()));
    }
    let char_shape_id = u16::from_le_bytes(p[cur..cur + 2].try_into().unwrap());
    cur += 2;
    let para_shape_id = u16::from_le_bytes(p[cur..cur + 2].try_into().unwrap());
    Ok(Style {
        local_name,
        english_name,
        para_shape_id,
        char_shape_id,
    })
}

fn read_wstr(p: &[u8], cur: &mut usize) -> Option<String> {
    if *cur + 2 > p.len() {
        return None;
    }
    let len = u16::from_le_bytes(p[*cur..*cur + 2].try_into().ok()?) as usize;
    *cur += 2;
    let nbytes = len.checked_mul(2)?;
    if *cur + nbytes > p.len() {
        return None;
    }
    let mut units = Vec::with_capacity(len);
    for i in 0..len {
        units.push(u16::from_le_bytes(
            p[*cur + i * 2..*cur + i * 2 + 2].try_into().ok()?,
        ));
    }
    *cur += nbytes;
    Some(String::from_utf16_lossy(&units))
}

#[cfg(test)]
mod tests {
    use super::*;
    #[test]
    fn parses_minimal_style() {
        // local_name="본문", english=""; then 1+1+2+2+2 bytes
        let mut p = vec![0u8; 0];
        let name: Vec<u16> = "본문".encode_utf16().collect();
        p.extend_from_slice(&(name.len() as u16).to_le_bytes());
        for u in &name {
            p.extend_from_slice(&u.to_le_bytes());
        }
        p.extend_from_slice(&0u16.to_le_bytes()); // english_name len=0
        p.extend_from_slice(&[0, 0]); // props, next
        p.extend_from_slice(&0u16.to_le_bytes()); // lang_id
        p.extend_from_slice(&3u16.to_le_bytes()); // char_shape_id
        p.extend_from_slice(&5u16.to_le_bytes()); // para_shape_id
        let s = parse_style(&p).unwrap();
        assert_eq!(s.local_name, "본문");
        assert_eq!(s.char_shape_id, 3);
        assert_eq!(s.para_shape_id, 5);
    }
}