jw-hwp-core 0.1.1

Read-only parser for Hancom HWP 5.0 (binary CFB) and HWPX (OWPML) documents
Documentation
//! Parser for HWPTAG_FACE_NAME (DocInfo-level font face definitions).

use crate::error::Error;

#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize)]
pub struct FaceName {
    pub name: String,
    /// Substitute font name (if document stored one).
    pub substitute: Option<String>,
    /// Base font name (if document stored one).
    pub base: Option<String>,
}

pub fn parse_face_name(p: &[u8]) -> Result<FaceName, Error> {
    if p.is_empty() {
        return Err(Error::Record("FaceName: empty payload".into()));
    }
    let mut cur = 0usize;
    let props = p[cur];
    cur += 1;
    let has_substitute = props & 0x80 != 0;
    let has_type_info = props & 0x40 != 0;
    let has_base = props & 0x20 != 0;

    // name (always first after props)
    let name = read_wstr(p, &mut cur).ok_or_else(|| Error::Record("FaceName: name".into()))?;

    let mut substitute = None;
    if has_substitute {
        if cur >= p.len() {
            return Ok(FaceName {
                name,
                substitute,
                base: None,
            });
        }
        cur += 1; // substitute type byte
        substitute = read_wstr(p, &mut cur);
    }
    if has_type_info {
        cur = cur.saturating_add(10); // 10 bytes type_info, skipped
    }
    let base = if has_base {
        read_wstr(p, &mut cur)
    } else {
        None
    };
    Ok(FaceName {
        name,
        substitute,
        base,
    })
}

/// Read `u16` length (UTF-16 units) + that many UTF-16LE code units. Advances `cur`.
fn read_wstr(p: &[u8], cur: &mut usize) -> Option<String> {
    if *cur + 2 > p.len() {
        return None;
    }
    let len = u16::from_le_bytes(p[*cur..*cur + 2].try_into().ok()?) as usize;
    *cur += 2;
    let nbytes = len.checked_mul(2)?;
    if *cur + nbytes > p.len() {
        return None;
    }
    let mut units = Vec::with_capacity(len);
    for i in 0..len {
        units.push(u16::from_le_bytes(
            p[*cur + i * 2..*cur + i * 2 + 2].try_into().ok()?,
        ));
    }
    *cur += nbytes;
    Some(String::from_utf16_lossy(&units))
}

#[cfg(test)]
mod tests {
    use super::*;
    fn encode(name: &str) -> Vec<u8> {
        let mut v = vec![0u8]; // props: no sub, no type, no base
        let units: Vec<u16> = name.encode_utf16().collect();
        v.extend_from_slice(&(units.len() as u16).to_le_bytes());
        for u in units {
            v.extend_from_slice(&u.to_le_bytes());
        }
        v
    }
    #[test]
    fn parses_plain_name() {
        let fn_ = parse_face_name(&encode("바탕")).unwrap();
        assert_eq!(fn_.name, "바탕");
        assert!(fn_.substitute.is_none());
    }
    #[test]
    fn rejects_empty() {
        assert!(parse_face_name(&[]).is_err());
    }
}