Skip to main content

jw_hwp_core/
structure.rs

1//! Per-paragraph classification: text vs. container control (table, image, ...).
2
3use serde::Serialize;
4
5#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
6#[serde(rename_all = "snake_case")]
7pub enum NodeKind {
8    Paragraph,
9    Table,
10    Image,
11    Equation,
12    Header,
13    Footer,
14    Footnote,
15    Endnote,
16    SectionDefinition,
17    ColumnDefinition,
18    Field,
19    Unsupported,
20}
21
22#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
23pub struct StructureNode {
24    /// Stable ID shaped like `"{section}:{paragraph}"`.
25    pub id: String,
26    pub kind: NodeKind,
27    pub preview: String,
28    /// Raw ASCII FourCC of the first control header in the paragraph, if any.
29    pub ctrl_id: Option<String>,
30}
31
32/// Map an HWP control-header ASCII FourCC to a NodeKind.
33///
34/// Input is 4 ASCII bytes in file order (e.g. `[b't', b'b', b'l', b' ']`).
35pub fn classify(ctrl: [u8; 4]) -> NodeKind {
36    match &ctrl {
37        b"tbl " => NodeKind::Table,
38        b"gso " => NodeKind::Image,
39        b"eqed" => NodeKind::Equation,
40        b"hedd" => NodeKind::Header,
41        b"foot" => NodeKind::Footer,
42        b"%fn " => NodeKind::Footnote,
43        b"%en " => NodeKind::Endnote,
44        b"secd" => NodeKind::SectionDefinition,
45        b"cold" => NodeKind::ColumnDefinition,
46        c if c[0] == b'%' => NodeKind::Field,
47        _ => NodeKind::Unsupported,
48    }
49}
50
51/// Cheap preview: first 60 Unicode scalar values of text, newlines replaced with spaces.
52pub fn make_preview(text: &str) -> String {
53    let flat: String = text
54        .chars()
55        .map(|c| if c == '\n' { ' ' } else { c })
56        .collect();
57    flat.chars().take(60).collect()
58}
59
60#[cfg(test)]
61mod tests {
62    use super::*;
63
64    #[test]
65    fn classifies_known_ctrl_ids() {
66        assert_eq!(classify(*b"tbl "), NodeKind::Table);
67        assert_eq!(classify(*b"gso "), NodeKind::Image);
68        assert_eq!(classify(*b"secd"), NodeKind::SectionDefinition);
69        assert_eq!(classify(*b"%fn "), NodeKind::Footnote);
70        assert_eq!(classify(*b"%xy "), NodeKind::Field);
71        assert_eq!(classify(*b"xxxx"), NodeKind::Unsupported);
72    }
73
74    #[test]
75    fn preview_trims_to_60_and_flattens_newlines() {
76        let long = "a\nb".repeat(100);
77        let p = make_preview(&long);
78        assert_eq!(p.chars().count(), 60);
79        assert!(!p.contains('\n'));
80    }
81}