Skip to main content

hwpforge_smithy_hwpx/decoder/
mod.rs

1//! HWPX decoding pipeline.
2//!
3//! Submodules handle individual stages:
4//! - `package` — ZIP extraction and file access
5//! - `header` — `header.xml` parsing → [`HwpxStyleStore`]
6//! - `section` — `section*.xml` parsing → paragraphs + page settings
7
8pub(crate) mod chart;
9pub(crate) mod header;
10pub(crate) mod package;
11pub(crate) mod section;
12pub(crate) mod shapes;
13
14use std::path::Path;
15
16use hwpforge_core::document::{Document, Draft};
17use hwpforge_core::image::ImageStore;
18use hwpforge_core::section::{MasterPage, Section};
19use hwpforge_core::PageSettings;
20use hwpforge_foundation::ApplyPageType;
21
22use crate::error::HwpxResult;
23use crate::style_store::HwpxStyleStore;
24
25// ── HwpxDocument ─────────────────────────────────────────────────
26
27/// The result of decoding an HWPX file.
28///
29/// Contains the Core document (structure), the HWPX-specific style
30/// store (fonts, char shapes, para shapes from `header.xml`), and
31/// binary image data extracted from `BinData/` entries.
32#[derive(Debug)]
33#[non_exhaustive]
34pub struct HwpxDocument {
35    /// The decoded document in Core's DOM.
36    pub document: Document<Draft>,
37    /// Style information parsed from `header.xml`.
38    pub style_store: HwpxStyleStore,
39    /// Binary image data extracted from `BinData/` ZIP entries.
40    pub image_store: ImageStore,
41}
42
43// ── HwpxDecoder ──────────────────────────────────────────────────
44
45/// Decodes HWPX files (ZIP + XML) into Core's `Document<Draft>`.
46///
47/// # Examples
48///
49/// ```no_run
50/// use hwpforge_smithy_hwpx::HwpxDecoder;
51///
52/// let bytes = std::fs::read("document.hwpx").unwrap();
53/// let result = HwpxDecoder::decode(&bytes).unwrap();
54/// println!("Sections: {}", result.document.sections().len());
55/// ```
56pub struct HwpxDecoder;
57
58impl HwpxDecoder {
59    /// Decodes an HWPX file from raw bytes.
60    ///
61    /// Pipeline:
62    /// 1. Open ZIP archive, validate mimetype
63    /// 2. Parse `Contents/header.xml` → `HwpxStyleStore`
64    /// 3. Parse `Contents/section*.xml` → paragraphs + page settings
65    /// 4. Assemble `Document<Draft>` with sections
66    pub fn decode(bytes: &[u8]) -> HwpxResult<HwpxDocument> {
67        // Step 1: Open package
68        let mut pkg = package::PackageReader::new(bytes)?;
69
70        // Step 2: Parse header (style store + begin_num)
71        let header_xml = pkg.read_header_xml()?;
72        let header_result = header::parse_header(&header_xml)?;
73        let style_store = header_result.style_store;
74        let begin_num = header_result.begin_num;
75
76        // Step 3: Extract chart XMLs from ZIP
77        let chart_xmls = pkg.read_chart_xmls()?;
78
79        // Step 4: Extract masterpage XMLs from ZIP and parse them
80        let masterpage_xmls = pkg.read_masterpage_xmls()?;
81        let parsed_masterpages = parse_masterpages(masterpage_xmls);
82
83        // Step 5: Parse sections
84        let mut document = Document::<Draft>::new();
85        let section_count = pkg.section_count();
86        // Track how many masterpages have been assigned across sections
87        let mut masterpage_cursor = 0usize;
88
89        for i in 0..section_count {
90            let section_xml = pkg.read_section_xml(i)?;
91            let result = section::parse_section(&section_xml, i, &chart_xmls)?;
92
93            let page_settings = result.page_settings.unwrap_or_else(PageSettings::a4);
94
95            // Determine how many masterpages this section owns by scanning
96            // the section XML for masterPageCnt attribute (avoids modifying section.rs).
97            // Fall back to result.master_pages (parsed inline) if no ZIP files were found.
98            let mp_cnt = extract_master_page_cnt(&section_xml);
99            let section_master_pages: Option<Vec<MasterPage>> = if mp_cnt > 0 {
100                let end = (masterpage_cursor + mp_cnt).min(parsed_masterpages.len());
101                let slice = parsed_masterpages[masterpage_cursor..end].to_vec();
102                masterpage_cursor = end;
103                if slice.is_empty() {
104                    result.master_pages
105                } else {
106                    Some(slice)
107                }
108            } else {
109                result.master_pages
110            };
111
112            let section = Section {
113                paragraphs: result.paragraphs,
114                page_settings,
115                header: result.header,
116                footer: result.footer,
117                page_number: result.page_number,
118                column_settings: result.column_settings,
119                visibility: result.visibility,
120                line_number_shape: result.line_number_shape,
121                page_border_fills: result.page_border_fills,
122                master_pages: section_master_pages,
123                // Per-section startNum from secPr; merge footnote/endnote
124                // from header.xml for the first section.
125                begin_num: {
126                    let mut bn = result.begin_num;
127                    if i == 0 {
128                        if let (Some(ref mut section_bn), Some(ref header_bn)) =
129                            (&mut bn, &begin_num)
130                        {
131                            section_bn.footnote = header_bn.footnote;
132                            section_bn.endnote = header_bn.endnote;
133                        } else if bn.is_none() {
134                            bn = begin_num;
135                        }
136                    }
137                    bn
138                },
139                text_direction: result.text_direction,
140            };
141
142            document.add_section(section);
143        }
144
145        // Step 6: Extract binary image data from BinData/
146        let image_store = pkg.read_all_bindata()?;
147
148        Ok(HwpxDocument { document, style_store, image_store })
149    }
150
151    /// Decodes an HWPX file from a filesystem path.
152    pub fn decode_file(path: impl AsRef<Path>) -> HwpxResult<HwpxDocument> {
153        let bytes = std::fs::read(path.as_ref()).map_err(crate::error::HwpxError::Io)?;
154        Self::decode(&bytes)
155    }
156}
157
158// ── Masterpage helpers ────────────────────────────────────────────
159
160/// Parses all masterpage XML strings into [`MasterPage`] structs.
161///
162/// Input is a map from global masterpage index to raw XML.
163/// Returns a `Vec` sorted by index so masterpage 0 comes first.
164fn parse_masterpages(xmls: std::collections::HashMap<usize, String>) -> Vec<MasterPage> {
165    let mut entries: Vec<(usize, String)> = xmls.into_iter().collect();
166    entries.sort_by_key(|(idx, _)| *idx);
167    entries.into_iter().map(|(_, xml)| parse_masterpage_xml(&xml)).collect()
168}
169
170/// Parses a single masterpage XML string into a [`MasterPage`].
171///
172/// Extracts the `applyPageType` attribute from the root `<masterPage>` element
173/// and the paragraph text from `<hp:subList><hp:p><hp:run><hp:t>` descendants.
174/// Unknown `applyPageType` values fall back to `Both`.
175fn parse_masterpage_xml(xml: &str) -> MasterPage {
176    use hwpforge_core::paragraph::Paragraph;
177    use hwpforge_core::run::{Run, RunContent};
178    use hwpforge_foundation::{CharShapeIndex, ParaShapeIndex};
179
180    // Extract applyPageType attribute
181    let apply_page_type = extract_masterpage_apply_type(xml);
182
183    // Extract paragraphs: find all <hp:p> elements with their attributes.
184    // This is a lightweight scan — masterpage paragraphs typically contain
185    // minimal or no text content.
186    let mut paragraphs = Vec::new();
187    let mut search = xml;
188    while let Some(p_start) = search.find("<hp:p ").or_else(|| search.find("<hp:p>")) {
189        let after_p = &search[p_start..];
190        // Find the end of the opening <hp:p ...> tag
191        let Some(tag_end) = after_p.find('>') else { break };
192        let open_tag = &after_p[..tag_end];
193        let after_tag = &after_p[tag_end + 1..];
194        let Some(p_close) = after_tag.find("</hp:p>") else { break };
195        let p_content = &after_tag[..p_close];
196
197        // Extract paraPrIDRef from the <hp:p> tag
198        let para_pr_id = extract_attr_u32(open_tag, "paraPrIDRef");
199
200        // Collect all text runs within this paragraph
201        let mut runs = Vec::new();
202        let mut run_search = p_content;
203        while let Some(r_start) =
204            run_search.find("<hp:run ").or_else(|| run_search.find("<hp:run>"))
205        {
206            let after_r = &run_search[r_start..];
207            let Some(r_tag_end) = after_r.find('>') else { break };
208            let run_open = &after_r[..r_tag_end];
209            let char_pr_id = extract_attr_u32(run_open, "charPrIDRef");
210
211            // Find text within this run
212            let after_run_tag = &after_r[r_tag_end + 1..];
213            if let Some(t_start) = after_run_tag.find("<hp:t>") {
214                let after_t = &after_run_tag[t_start + "<hp:t>".len()..];
215                if let Some(t_end) = after_t.find("</hp:t>") {
216                    let text = &after_t[..t_end];
217                    if !text.is_empty() {
218                        runs.push(Run {
219                            content: RunContent::Text(text.to_string()),
220                            char_shape_id: CharShapeIndex::new(char_pr_id as usize),
221                        });
222                    }
223                }
224            }
225
226            // Advance past this run
227            let run_end_tag = "</hp:run>";
228            if let Some(re) = after_r.find(run_end_tag) {
229                run_search = &after_r[re + run_end_tag.len()..];
230            } else {
231                break;
232            }
233        }
234
235        let mut para = Paragraph::new(ParaShapeIndex::new(para_pr_id as usize));
236        for run in runs {
237            para.runs.push(run);
238        }
239        paragraphs.push(para);
240
241        // Advance past this </hp:p>
242        search = &after_tag[p_close + "</hp:p>".len()..];
243    }
244
245    MasterPage { apply_page_type, paragraphs }
246}
247
248/// Extracts a named u32 attribute value from an XML open-tag string.
249///
250/// Returns 0 if the attribute is not found or cannot be parsed.
251fn extract_attr_u32(open_tag: &str, attr_name: &str) -> u32 {
252    let needle = format!("{attr_name}=\"");
253    if let Some(pos) = open_tag.find(&needle) {
254        let after = &open_tag[pos + needle.len()..];
255        if let Some(end) = after.find('"') {
256            return after[..end].parse().unwrap_or(0);
257        }
258    }
259    0
260}
261
262/// Extracts the `applyPageType` attribute value from a masterpage XML root element.
263fn extract_masterpage_apply_type(xml: &str) -> ApplyPageType {
264    // Look for type="BOTH"|"EVEN"|"ODD" in the <masterPage ...> opening tag.
265    // The encoder writes: <masterPage ... type="BOTH">
266    if let Some(pos) = xml.find("type=\"") {
267        let after = &xml[pos + "type=\"".len()..];
268        if let Some(end) = after.find('"') {
269            return match &after[..end] {
270                "BOTH" => ApplyPageType::Both,
271                "EVEN" => ApplyPageType::Even,
272                "ODD" => ApplyPageType::Odd,
273                _ => ApplyPageType::Both,
274            };
275        }
276    }
277    ApplyPageType::Both
278}
279
280/// Extracts `masterPageCnt` from the `<hp:secPr>` element in a section XML string.
281///
282/// Scans the raw XML for `masterPageCnt="N"` without re-parsing the full XML.
283/// Returns 0 if the attribute is absent or unparseable.
284fn extract_master_page_cnt(section_xml: &str) -> usize {
285    let needle = "masterPageCnt=\"";
286    if let Some(pos) = section_xml.find(needle) {
287        let after = &section_xml[pos + needle.len()..];
288        if let Some(end) = after.find('"') {
289            return after[..end].parse().unwrap_or(0);
290        }
291    }
292    0
293}
294
295#[cfg(test)]
296mod tests {
297    use super::*;
298    use hwpforge_foundation::{HeadingType, NumberFormatType};
299    use std::io::{Cursor, Write};
300    use std::path::PathBuf;
301    use zip::write::SimpleFileOptions;
302    use zip::ZipWriter;
303
304    /// Creates a complete minimal HWPX for testing.
305    fn make_test_hwpx(header_xml: &str, section_xmls: &[&str]) -> Vec<u8> {
306        let buf = Vec::new();
307        let mut zip = ZipWriter::new(Cursor::new(buf));
308
309        let stored =
310            SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored);
311        let deflate = SimpleFileOptions::default();
312
313        zip.start_file("mimetype", stored).unwrap();
314        zip.write_all(b"application/hwp+zip").unwrap();
315
316        zip.start_file("Contents/header.xml", deflate).unwrap();
317        zip.write_all(header_xml.as_bytes()).unwrap();
318
319        for (i, xml) in section_xmls.iter().enumerate() {
320            let path = format!("Contents/section{}.xml", i);
321            zip.start_file(&path, deflate).unwrap();
322            zip.write_all(xml.as_bytes()).unwrap();
323        }
324
325        zip.finish().unwrap().into_inner()
326    }
327
328    fn fixture_path(name: &str) -> PathBuf {
329        PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../tests/fixtures").join(name)
330    }
331
332    fn decode_fixture(name: &str) -> HwpxDocument {
333        let path = fixture_path(name);
334        let bytes =
335            std::fs::read(&path).unwrap_or_else(|_| panic!("fixture should exist: {path:?}"));
336        HwpxDecoder::decode(&bytes).unwrap_or_else(|_| panic!("fixture should decode: {path:?}"))
337    }
338
339    fn collect_body_heading_triples(doc: &HwpxDocument) -> Vec<(HeadingType, u32, u32)> {
340        doc.document
341            .sections()
342            .iter()
343            .flat_map(|section| section.paragraphs.iter())
344            .map(|paragraph| {
345                let shape = doc
346                    .style_store
347                    .para_shape(paragraph.para_shape_id)
348                    .expect("paragraph para shape should exist");
349                (shape.heading_type, shape.heading_id_ref, shape.heading_level)
350            })
351            .collect()
352    }
353
354    const HEADER: &str = r##"<head version="1.4" secCnt="1">
355        <refList>
356            <fontfaces itemCnt="1">
357                <fontface lang="HANGUL" fontCnt="1">
358                    <font id="0" face="함초롬돋움" type="TTF" isEmbedded="0"/>
359                </fontface>
360            </fontfaces>
361            <charProperties itemCnt="1">
362                <charPr id="0" height="1000" textColor="#000000" shadeColor="none"
363                        useFontSpace="0" useKerning="0" symMark="NONE" borderFillIDRef="0">
364                    <fontRef hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>
365                </charPr>
366            </charProperties>
367            <paraProperties itemCnt="1">
368                <paraPr id="0">
369                    <align horizontal="LEFT" vertical="BASELINE"/>
370                    <switch><default>
371                        <lineSpacing type="PERCENT" value="160"/>
372                    </default></switch>
373                </paraPr>
374            </paraProperties>
375        </refList>
376    </head>"##;
377
378    const SECTION_TEXT: &str = r#"<sec>
379        <p paraPrIDRef="0">
380            <run charPrIDRef="0">
381                <secPr textDirection="HORIZONTAL">
382                    <pagePr landscape="WIDELY" width="59528" height="84188">
383                        <margin header="4252" footer="4252" gutter="0"
384                                left="8504" right="8504" top="5668" bottom="4252"/>
385                    </pagePr>
386                </secPr>
387                <t>안녕하세요</t>
388            </run>
389        </p>
390    </sec>"#;
391
392    // ── Full pipeline tests ──────────────────────────────────────
393
394    #[test]
395    fn decode_minimal_hwpx() {
396        let bytes = make_test_hwpx(HEADER, &[SECTION_TEXT]);
397        let result = HwpxDecoder::decode(&bytes).unwrap();
398
399        // Document structure
400        assert_eq!(result.document.sections().len(), 1);
401        let section = &result.document.sections()[0];
402        assert_eq!(section.paragraphs.len(), 1);
403
404        // Text content
405        let text = section.paragraphs[0].runs[0].content.as_text();
406        assert_eq!(text, Some("안녕하세요"));
407
408        // Page settings
409        assert_eq!(section.page_settings.width.as_i32(), 59528);
410        assert_eq!(section.page_settings.height.as_i32(), 84188);
411
412        // Style store
413        assert_eq!(result.style_store.font_count(), 1);
414        assert_eq!(result.style_store.char_shape_count(), 1);
415        assert_eq!(result.style_store.para_shape_count(), 1);
416    }
417
418    #[test]
419    fn decode_multiple_sections() {
420        let s0 = r#"<sec><p paraPrIDRef="0"><run charPrIDRef="0"><t>Section 0</t></run></p></sec>"#;
421        let s1 = r#"<sec><p paraPrIDRef="0"><run charPrIDRef="0"><t>Section 1</t></run></p></sec>"#;
422        let bytes = make_test_hwpx(HEADER, &[s0, s1]);
423        let result = HwpxDecoder::decode(&bytes).unwrap();
424        assert_eq!(result.document.sections().len(), 2);
425    }
426
427    #[test]
428    fn decode_with_table() {
429        let section = r#"<sec>
430            <p paraPrIDRef="0">
431                <run charPrIDRef="0">
432                    <tbl rowCnt="1" colCnt="1">
433                        <tr>
434                            <tc name="A1">
435                                <cellSz width="5000" height="1000"/>
436                                <subList><p paraPrIDRef="0"><run charPrIDRef="0"><t>Cell</t></run></p></subList>
437                            </tc>
438                        </tr>
439                    </tbl>
440                </run>
441            </p>
442        </sec>"#;
443        let bytes = make_test_hwpx(HEADER, &[section]);
444        let result = HwpxDecoder::decode(&bytes).unwrap();
445        let run = &result.document.sections()[0].paragraphs[0].runs[0];
446        assert!(run.content.is_table());
447    }
448
449    #[test]
450    fn decode_section_without_secpr_uses_a4_defaults() {
451        let section = r#"<sec><p paraPrIDRef="0"><run charPrIDRef="0"><t>Text</t></run></p></sec>"#;
452        let bytes = make_test_hwpx(HEADER, &[section]);
453        let result = HwpxDecoder::decode(&bytes).unwrap();
454        let ps = &result.document.sections()[0].page_settings;
455        assert_eq!(*ps, PageSettings::a4());
456    }
457
458    #[test]
459    fn decode_not_a_zip() {
460        let err = HwpxDecoder::decode(b"not a zip").unwrap_err();
461        assert!(matches!(err, crate::error::HwpxError::Zip(_)));
462    }
463
464    #[test]
465    fn decode_file_nonexistent() {
466        let err = HwpxDecoder::decode_file("/nonexistent/path.hwpx").unwrap_err();
467        assert!(matches!(err, crate::error::HwpxError::Io(_)));
468    }
469
470    // ── Header / Footer / PageNum decode tests ──────────────────
471
472    #[test]
473    fn decode_section_with_header_ctrl() {
474        let section = r#"<sec>
475            <p paraPrIDRef="0">
476                <run charPrIDRef="0">
477                    <ctrl>
478                        <header id="0" applyPageType="BOTH">
479                            <subList id="0" textDirection="HORIZONTAL" lineWrap="BREAK" vertAlign="TOP"
480                                     linkListIDRef="0" linkListNextIDRef="0" textWidth="0" textHeight="0">
481                                <p paraPrIDRef="0">
482                                    <run charPrIDRef="0"><t>Page Header</t></run>
483                                </p>
484                            </subList>
485                        </header>
486                    </ctrl>
487                    <t>Body text</t>
488                </run>
489            </p>
490        </sec>"#;
491        let bytes = make_test_hwpx(HEADER, &[section]);
492        let result = HwpxDecoder::decode(&bytes).unwrap();
493
494        let sec = &result.document.sections()[0];
495        let header = sec.header.as_ref().expect("section should have header");
496        assert_eq!(header.apply_page_type, hwpforge_foundation::ApplyPageType::Both);
497        assert_eq!(header.paragraphs.len(), 1);
498        assert_eq!(header.paragraphs[0].runs[0].content.as_text(), Some("Page Header"));
499    }
500
501    #[test]
502    fn decode_section_with_footer_and_pagenum() {
503        let section = r#"<sec>
504            <p paraPrIDRef="0">
505                <run charPrIDRef="0">
506                    <ctrl>
507                        <footer id="0" applyPageType="ODD">
508                            <subList id="0" textDirection="HORIZONTAL" lineWrap="BREAK" vertAlign="TOP"
509                                     linkListIDRef="0" linkListNextIDRef="0" textWidth="0" textHeight="0">
510                                <p paraPrIDRef="0">
511                                    <run charPrIDRef="0"><t>Footer</t></run>
512                                </p>
513                            </subList>
514                        </footer>
515                    </ctrl>
516                    <ctrl>
517                        <pageNum pos="BOTTOM_CENTER" formatType="DIGIT" sideChar="- "/>
518                    </ctrl>
519                    <t>Body</t>
520                </run>
521            </p>
522        </sec>"#;
523        let bytes = make_test_hwpx(HEADER, &[section]);
524        let result = HwpxDecoder::decode(&bytes).unwrap();
525
526        let sec = &result.document.sections()[0];
527        let footer = sec.footer.as_ref().expect("section should have footer");
528        assert_eq!(footer.apply_page_type, hwpforge_foundation::ApplyPageType::Odd);
529        assert_eq!(footer.paragraphs[0].runs[0].content.as_text(), Some("Footer"));
530
531        let pn = sec.page_number.as_ref().expect("section should have page number");
532        assert_eq!(pn.position, hwpforge_foundation::PageNumberPosition::BottomCenter);
533        assert_eq!(pn.number_format, hwpforge_foundation::NumberFormatType::Digit);
534        assert_eq!(pn.decoration, "- ");
535    }
536
537    // ── Image binary roundtrip test ─────────────────────────────
538
539    #[test]
540    fn decode_extracts_bindata_images() {
541        let buf = Vec::new();
542        let mut zip = ZipWriter::new(Cursor::new(buf));
543        let stored =
544            SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored);
545        let deflate = SimpleFileOptions::default();
546
547        zip.start_file("mimetype", stored).unwrap();
548        zip.write_all(b"application/hwp+zip").unwrap();
549
550        zip.start_file("Contents/header.xml", deflate).unwrap();
551        zip.write_all(HEADER.as_bytes()).unwrap();
552
553        let section = r#"<sec><p paraPrIDRef="0"><run charPrIDRef="0"><t>Body</t></run></p></sec>"#;
554        zip.start_file("Contents/section0.xml", deflate).unwrap();
555        zip.write_all(section.as_bytes()).unwrap();
556
557        // Add a BinData image
558        let fake_png = vec![0x89, 0x50, 0x4E, 0x47]; // PNG magic bytes
559        zip.start_file("BinData/logo.png", stored).unwrap();
560        zip.write_all(&fake_png).unwrap();
561
562        let bytes = zip.finish().unwrap().into_inner();
563        let result = HwpxDecoder::decode(&bytes).unwrap();
564
565        assert!(!result.image_store.is_empty(), "image store should contain extracted images");
566        let data = result.image_store.get("logo.png").expect("should find logo.png");
567        assert_eq!(data, &fake_png);
568    }
569
570    #[test]
571    fn decode_user_sample_bullet_list_preserves_bullet_semantics() {
572        let decoded = decode_fixture("user_samples/lists/sample-bullet-list.hwpx");
573        let headings = collect_body_heading_triples(&decoded);
574
575        assert!(headings.contains(&(HeadingType::Bullet, 1, 0)));
576        assert_eq!(decoded.style_store.bullet_count(), 1);
577        assert_eq!(decoded.style_store.numbering_count(), 1);
578        assert_eq!(decoded.style_store.iter_bullets().next().map(|bullet| bullet.id), Some(1));
579    }
580
581    #[test]
582    fn decode_user_sample_numbered_list_preserves_numbering_semantics() {
583        let decoded = decode_fixture("user_samples/lists/sample-numbered-list.hwpx");
584        let headings = collect_body_heading_triples(&decoded);
585
586        assert!(headings.contains(&(HeadingType::Number, 2, 0)));
587        assert!(decoded.style_store.numbering_count() >= 2);
588    }
589
590    #[test]
591    fn decode_user_sample_mixed_lists_with_outline_preserves_all_list_kinds() {
592        let decoded = decode_fixture("user_samples/lists/sample-mixed-lists-with-outline.hwpx");
593        let headings = collect_body_heading_triples(&decoded);
594
595        assert!(headings.contains(&(HeadingType::Outline, 0, 0)));
596        assert!(headings.contains(&(HeadingType::Outline, 0, 1)));
597        assert!(headings.contains(&(HeadingType::Outline, 0, 2)));
598        assert!(headings.contains(&(HeadingType::Bullet, 1, 0)));
599        assert!(headings.contains(&(HeadingType::Number, 2, 0)));
600        assert!(headings.contains(&(HeadingType::Number, 3, 0)));
601        assert_eq!(decoded.style_store.bullet_count(), 1);
602        assert!(decoded.style_store.numbering_count() >= 3);
603    }
604
605    #[test]
606    fn decode_user_sample_numbered_custom_formats_preserves_distinct_numbering_ids() {
607        let decoded = decode_fixture("user_samples/lists/sample-numbered-list-custom-formats.hwpx");
608        let headings = collect_body_heading_triples(&decoded);
609
610        for id_ref in [2, 3, 4, 5] {
611            assert!(headings.contains(&(HeadingType::Number, id_ref, 0)));
612        }
613        assert!(decoded.style_store.numbering_count() >= 5);
614        let numberings: Vec<_> = decoded.style_store.iter_numberings().collect();
615        assert_eq!(numberings[1].levels[0].text, "^1)");
616        assert_eq!(numberings[2].levels[0].text, "(^1)");
617        assert_eq!(numberings[4].levels[6].num_format, NumberFormatType::CircledLatinSmall);
618    }
619
620    #[test]
621    fn decode_user_sample_checkable_bullet_basic_preserves_checked_glyph_and_item_state() {
622        let decoded = decode_fixture("user_samples/lists/sample-checkable-bullet-basic.hwpx");
623        let paragraphs = &decoded.document.sections()[0].paragraphs;
624
625        let unchecked = paragraphs
626            .iter()
627            .find(|paragraph| paragraph.text_content().contains("unchecked item A"))
628            .expect("fixture should contain unchecked item A");
629        let checked = paragraphs
630            .iter()
631            .find(|paragraph| paragraph.text_content().contains("checked item B"))
632            .expect("fixture should contain checked item B");
633
634        let unchecked_shape = decoded.style_store.para_shape(unchecked.para_shape_id).unwrap();
635        let checked_shape = decoded.style_store.para_shape(checked.para_shape_id).unwrap();
636        let bullet = decoded
637            .style_store
638            .iter_bullets()
639            .find(|bullet| bullet.id == unchecked_shape.heading_id_ref)
640            .expect("checkable bullet definition should exist");
641
642        assert_eq!(unchecked_shape.heading_type, HeadingType::Bullet);
643        assert_eq!(checked_shape.heading_type, HeadingType::Bullet);
644        assert!(bullet.is_checkable());
645        assert_eq!(bullet.checked_char.as_deref(), Some("☑"));
646        assert!(!unchecked_shape.checked);
647        assert!(checked_shape.checked);
648    }
649
650    #[test]
651    fn decode_user_sample_checkable_bullet_nested_preserves_depth() {
652        let decoded = decode_fixture("user_samples/lists/sample-checkable-bullet-nested.hwpx");
653        let paragraphs = &decoded.document.sections()[0].paragraphs;
654
655        let level1 = paragraphs
656            .iter()
657            .find(|paragraph| paragraph.text_content().contains("level 1 unchecked"))
658            .expect("fixture should contain level 1 item");
659        let level2 = paragraphs
660            .iter()
661            .find(|paragraph| paragraph.text_content().contains("level 2 checked"))
662            .expect("fixture should contain level 2 item");
663        let level3 = paragraphs
664            .iter()
665            .find(|paragraph| paragraph.text_content().contains("level 3 unchecked"))
666            .expect("fixture should contain level 3 item");
667
668        assert_eq!(decoded.style_store.para_shape(level1.para_shape_id).unwrap().heading_level, 0);
669        assert_eq!(decoded.style_store.para_shape(level2.para_shape_id).unwrap().heading_level, 1);
670        assert_eq!(decoded.style_store.para_shape(level3.para_shape_id).unwrap().heading_level, 2);
671    }
672}