Skip to main content

jw_hwp_core/
shape.rs

1//! Normalized CharShape / ParaShape and their DocInfo record parsers.
2
3use crate::error::Error;
4use serde::Serialize;
5use std::collections::HashMap;
6
7#[derive(Debug, Clone, Default, Serialize, PartialEq)]
8pub struct CharShape {
9    /// Font face ID per language (HWP has 7 lang slots: Hangul/English/Hanja/Japanese/Other/Symbol/User).
10    pub face_ids: [u16; 7],
11    /// Resolved font names per language slot (populated after FaceName parsing).
12    #[serde(default)]
13    pub face_names: [String; 7],
14    /// Base font size in points (HWP stores as pt*100, we expose as f32 points).
15    pub size_pt: f32,
16    pub italic: bool,
17    pub bold: bool,
18    pub underline: bool,
19    pub strikethrough: bool,
20    pub superscript: bool,
21    pub subscript: bool,
22    /// Text color as 0xBBGGRR (HWP COLORREF).
23    pub color: u32,
24}
25
26#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
27#[serde(rename_all = "snake_case")]
28pub enum Align {
29    Both,
30    Left,
31    Right,
32    Center,
33    Distributed,
34    Division,
35    Unknown,
36}
37
38#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
39pub struct ParaShape {
40    pub align: Align,
41    pub left_margin: i32,
42    pub right_margin: i32,
43    pub indent: i32,
44    pub space_before: i32,
45    pub space_after: i32,
46    /// Line spacing: legacy INT32 field for pre-5.0.2.5, extended UINT32 at offset 50 for newer docs.
47    pub line_spacing: i32,
48}
49
50#[derive(Debug, Clone, Default, Serialize, PartialEq)]
51pub struct ShapeTables {
52    /// char_shape_id → CharShape. IDs are implicit: the Nth HWPTAG_CHAR_SHAPE record in DocInfo has id N.
53    pub char_shapes: HashMap<u32, CharShape>,
54    pub para_shapes: HashMap<u32, ParaShape>,
55    pub faces: Vec<crate::faces::FaceName>,
56    pub styles: Vec<crate::styles::Style>,
57}
58
59pub fn parse_char_shape(p: &[u8]) -> Result<CharShape, Error> {
60    if p.len() < 50 {
61        return Err(Error::Record(format!("CharShape too short: {}", p.len())));
62    }
63    let mut face_ids = [0u16; 7];
64    for i in 0..7 {
65        face_ids[i] = u16::from_le_bytes(p[2 * i..2 * i + 2].try_into().unwrap());
66    }
67    let base_size = i32::from_le_bytes(p[42..46].try_into().unwrap());
68    let size_pt = base_size as f32 / 100.0;
69    let props = u32::from_le_bytes(p[46..50].try_into().unwrap());
70    let italic = props & 0x1 != 0;
71    let bold = props & 0x2 != 0;
72    // underline kind is bits 2..4; 0 = none, others = some underline
73    let underline = (props >> 2) & 0x3 != 0;
74    // strike kind bits 18..21 (spec表35 bit 18..20); non-zero means strikethrough
75    let strikethrough = (props >> 18) & 0x7 != 0;
76    let superscript = props & (1 << 15) != 0;
77    let subscript = props & (1 << 16) != 0;
78    let color = if p.len() >= 56 {
79        u32::from_le_bytes(p[52..56].try_into().unwrap())
80    } else {
81        0
82    };
83
84    Ok(CharShape {
85        face_ids,
86        face_names: Default::default(),
87        size_pt,
88        italic,
89        bold,
90        underline,
91        strikethrough,
92        superscript,
93        subscript,
94        color,
95    })
96}
97
98pub fn parse_para_shape(p: &[u8]) -> Result<ParaShape, Error> {
99    if p.len() < 42 {
100        return Err(Error::Record(format!("ParaShape too short: {}", p.len())));
101    }
102    let props1 = u32::from_le_bytes(p[0..4].try_into().unwrap());
103    let align = match (props1 >> 2) & 0x7 {
104        0 => Align::Both,
105        1 => Align::Left,
106        2 => Align::Right,
107        3 => Align::Center,
108        4 => Align::Distributed,
109        5 => Align::Division,
110        _ => Align::Unknown,
111    };
112    let left_margin = i32::from_le_bytes(p[4..8].try_into().unwrap());
113    let right_margin = i32::from_le_bytes(p[8..12].try_into().unwrap());
114    let indent = i32::from_le_bytes(p[12..16].try_into().unwrap());
115    let space_before = i32::from_le_bytes(p[16..20].try_into().unwrap());
116    let space_after = i32::from_le_bytes(p[20..24].try_into().unwrap());
117    let legacy_line_spacing = i32::from_le_bytes(p[24..28].try_into().unwrap());
118    let line_spacing = if p.len() >= 54 {
119        i32::from_le_bytes(p[50..54].try_into().unwrap())
120    } else {
121        legacy_line_spacing
122    };
123    Ok(ParaShape {
124        align,
125        left_margin,
126        right_margin,
127        indent,
128        space_before,
129        space_after,
130        line_spacing,
131    })
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137
138    fn build_char_shape(base_size: i32, italic: bool, bold: bool) -> Vec<u8> {
139        let mut p = vec![0u8; 72];
140        p[42..46].copy_from_slice(&base_size.to_le_bytes());
141        let mut props: u32 = 0;
142        if italic {
143            props |= 1;
144        }
145        if bold {
146            props |= 2;
147        }
148        p[46..50].copy_from_slice(&props.to_le_bytes());
149        p[52..56].copy_from_slice(&0x00112233u32.to_le_bytes());
150        p
151    }
152
153    #[test]
154    fn parses_bold_italic_char_shape() {
155        let p = build_char_shape(1200, true, true);
156        let cs = parse_char_shape(&p).unwrap();
157        assert_eq!(cs.size_pt, 12.0);
158        assert!(cs.italic);
159        assert!(cs.bold);
160        assert!(!cs.underline);
161        assert_eq!(cs.color, 0x00112233);
162    }
163
164    #[test]
165    fn parses_para_shape_alignment() {
166        let mut p = vec![0u8; 54];
167        // align = right = 2, stored in bits 2..4
168        let props1: u32 = 2 << 2;
169        p[0..4].copy_from_slice(&props1.to_le_bytes());
170        p[4..8].copy_from_slice(&100i32.to_le_bytes());
171        p[12..16].copy_from_slice(&200i32.to_le_bytes());
172        let ps = parse_para_shape(&p).unwrap();
173        assert_eq!(ps.align, Align::Right);
174        assert_eq!(ps.left_margin, 100);
175        assert_eq!(ps.indent, 200);
176    }
177}