jw-hwp-core 0.1.2

Read-only parser for Hancom HWP 5.0 (binary CFB) and HWPX (OWPML) documents
Documentation
//! HWP table control parsing: HWPTAG_TABLE payload + per-cell LIST_HEADER.

use crate::error::Error;

#[derive(Debug, Clone, serde::Serialize, PartialEq, Eq)]
pub struct Table {
    pub id: String,
    pub rows: u16,
    pub cols: u16,
    pub caption: Option<String>,
    pub cells: Vec<Vec<Option<Cell>>>,
}

#[derive(Debug, Clone, serde::Serialize, PartialEq, Eq)]
pub struct Cell {
    pub col: u16,
    pub row: u16,
    pub col_span: u16,
    pub row_span: u16,
    pub text: String,
    pub paragraphs: Vec<String>,
}

#[derive(Debug, Clone)]
pub(crate) struct TableProps {
    pub rows: u16,
    pub cols: u16,
}

pub(crate) fn parse_table_payload(p: &[u8]) -> Result<TableProps, Error> {
    if p.len() < 22 {
        return Err(Error::Record(format!(
            "HWPTAG_TABLE too short: {}",
            p.len()
        )));
    }
    let rows = u16::from_le_bytes(p[4..6].try_into().unwrap());
    let cols = u16::from_le_bytes(p[6..8].try_into().unwrap());
    // Sanity: full length should be 22 + 2*rows + 10*zones; we don't need zones here.
    Ok(TableProps { rows, cols })
}

#[derive(Debug, Clone)]
pub(crate) struct CellListHeader {
    pub para_count: i16,
    pub col: u16,
    pub row: u16,
    pub col_span: u16,
    pub row_span: u16,
}

pub(crate) fn parse_cell_list_header(p: &[u8]) -> Result<CellListHeader, Error> {
    if p.len() < 34 {
        return Err(Error::Record(format!(
            "cell LIST_HEADER too short: {}",
            p.len()
        )));
    }
    // Base LIST_HEADER = 8 bytes (INT16 para_count, UINT32 props, UINT16 unknown/list_id),
    // followed by 26 bytes of cell metadata.
    let para_count = i16::from_le_bytes(p[0..2].try_into().unwrap());
    let col = u16::from_le_bytes(p[8..10].try_into().unwrap());
    let row = u16::from_le_bytes(p[10..12].try_into().unwrap());
    let col_span = u16::from_le_bytes(p[12..14].try_into().unwrap());
    let row_span = u16::from_le_bytes(p[14..16].try_into().unwrap());
    Ok(CellListHeader {
        para_count,
        col,
        row,
        col_span,
        row_span,
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    fn build_table_props(rows: u16, cols: u16, zones: u16) -> Vec<u8> {
        let mut p = Vec::new();
        p.extend_from_slice(&0u32.to_le_bytes()); // props
        p.extend_from_slice(&rows.to_le_bytes());
        p.extend_from_slice(&cols.to_le_bytes());
        p.extend_from_slice(&0u16.to_le_bytes()); // cell_spacing
        p.extend_from_slice(&[0u8; 8]); // inner margins
        for _ in 0..rows {
            p.extend_from_slice(&1000u16.to_le_bytes());
        }
        p.extend_from_slice(&0u16.to_le_bytes()); // border_fill_id
        p.extend_from_slice(&zones.to_le_bytes());
        for _ in 0..zones {
            p.extend_from_slice(&[0u8; 10]);
        }
        p
    }

    #[test]
    fn parses_2x3_no_zones() {
        let p = build_table_props(2, 3, 0);
        assert_eq!(p.len(), 22 + 4);
        let tp = parse_table_payload(&p).unwrap();
        assert_eq!((tp.rows, tp.cols), (2, 3));
    }

    #[test]
    fn parses_with_zones() {
        let p = build_table_props(3, 1, 1);
        assert_eq!(p.len(), 22 + 6 + 10);
        let tp = parse_table_payload(&p).unwrap();
        assert_eq!((tp.rows, tp.cols), (3, 1));
    }

    #[test]
    fn rejects_truncated() {
        let p = vec![0u8; 10];
        assert!(parse_table_payload(&p).is_err());
    }

    #[test]
    fn parses_cell_list_header() {
        let mut p = Vec::new();
        p.extend_from_slice(&1i16.to_le_bytes()); // para_count
        p.extend_from_slice(&[0u8; 4]); // props
        p.extend_from_slice(&[0u8; 2]); // unknown/list_id
        p.extend_from_slice(&2u16.to_le_bytes()); // col
        p.extend_from_slice(&1u16.to_le_bytes()); // row
        p.extend_from_slice(&1u16.to_le_bytes()); // col_span
        p.extend_from_slice(&1u16.to_le_bytes()); // row_span
        p.extend_from_slice(&[0u8; 18]); // rest of cell meta
        let h = parse_cell_list_header(&p).unwrap();
        assert_eq!((h.col, h.row, h.col_span, h.row_span), (2, 1, 1, 1));
        assert_eq!(h.para_count, 1);
    }
}