Skip to main content

jw_hwp_core/
model.rs

1use crate::doc_info::DocumentProperties;
2use crate::error::Warning;
3use crate::structure::StructureNode;
4use crate::summary::Metadata;
5
6#[derive(Debug, Clone, serde::Serialize, PartialEq)]
7pub struct HwpDocument {
8    pub version: String,
9    pub metadata: Metadata,
10    pub properties: DocumentProperties,
11    pub shapes: crate::shape::ShapeTables,
12    pub sections: Vec<Section>,
13    #[serde(default)]
14    pub assets: crate::assets::AssetCatalog,
15    pub warnings: Vec<Warning>,
16}
17
18#[derive(Debug, Clone, serde::Serialize, PartialEq, Eq)]
19pub struct ParagraphDetail {
20    pub text: String,
21    pub para_shape_id: u32,
22    /// Sorted list of `(char_index_in_text, char_shape_id)` boundaries.
23    pub runs: Vec<(u32, u32)>,
24    #[serde(default, skip_serializing_if = "Vec::is_empty")]
25    pub footnotes: Vec<FootnoteBody>,
26    #[serde(default, skip_serializing_if = "Option::is_none")]
27    pub equation: Option<String>,
28    #[serde(default, skip_serializing_if = "Vec::is_empty")]
29    pub image_refs: Vec<ImageRef>,
30}
31
32#[derive(Debug, Clone, serde::Serialize, PartialEq, Eq)]
33pub struct FootnoteBody {
34    /// `"footnote"` | `"endnote"`.
35    pub kind: String,
36    pub text: String,
37}
38
39#[derive(Debug, Clone, serde::Serialize, PartialEq, Eq)]
40pub struct ImageRef {
41    pub bin_id: u16,
42}
43
44#[derive(Debug, Clone, serde::Serialize, PartialEq, Eq)]
45pub struct Section {
46    pub index: usize,
47    pub paragraphs: Vec<String>,
48    pub paragraph_details: Vec<ParagraphDetail>,
49    pub structure: Vec<StructureNode>,
50    pub tables: Vec<crate::table::Table>,
51}
52
53impl HwpDocument {
54    pub fn full_text(&self) -> String {
55        self.sections
56            .iter()
57            .flat_map(|s| s.paragraphs.iter())
58            .cloned()
59            .collect::<Vec<_>>()
60            .join("\n")
61    }
62}
63
64#[cfg(test)]
65mod tests {
66    use super::*;
67    use crate::structure::NodeKind;
68
69    #[test]
70    fn full_text_joins_paragraphs_with_newlines() {
71        let d = HwpDocument {
72            version: "5.0.3.0".into(),
73            metadata: Metadata::default(),
74            properties: DocumentProperties::default(),
75            shapes: Default::default(),
76            sections: vec![
77                Section {
78                    index: 0,
79                    paragraphs: vec!["a".into(), "b".into()],
80                    paragraph_details: vec![],
81                    structure: vec![],
82                    tables: vec![],
83                },
84                Section {
85                    index: 1,
86                    paragraphs: vec!["c".into()],
87                    paragraph_details: vec![],
88                    structure: vec![],
89                    tables: vec![],
90                },
91            ],
92            assets: Default::default(),
93            warnings: vec![],
94        };
95        assert_eq!(d.full_text(), "a\nb\nc");
96    }
97
98    #[test]
99    fn structure_node_defaults_to_paragraph() {
100        let node = StructureNode {
101            id: "0:0".into(),
102            kind: NodeKind::Paragraph,
103            preview: "hello".into(),
104            ctrl_id: None,
105        };
106        assert_eq!(node.kind, NodeKind::Paragraph);
107    }
108}