Skip to main content

rdocx/
document.rs

1//! The main Document type — entry point for the rdocx API.
2
3use std::path::Path;
4
5use rdocx_opc::OpcPackage;
6use rdocx_opc::relationship::rel_types;
7use rdocx_oxml::document::{BodyContent, CT_Columns, CT_Document, CT_SectPr};
8use rdocx_oxml::drawing::{CT_Anchor, CT_Drawing, CT_Inline};
9use rdocx_oxml::header_footer::{CT_HdrFtr, HdrFtrRef, HdrFtrType};
10use rdocx_oxml::numbering::CT_Numbering;
11use rdocx_oxml::properties::{CT_PPr, CT_RPr};
12use rdocx_oxml::shared::{ST_PageOrientation, ST_SectionType};
13use rdocx_oxml::styles::CT_Styles;
14use rdocx_oxml::table::CT_Tbl;
15use rdocx_oxml::text::{CT_P, CT_R, RunContent};
16
17use rdocx_oxml::core_properties::CoreProperties;
18
19use crate::Length;
20use crate::error::{Error, Result};
21use crate::paragraph::{Paragraph, ParagraphRef};
22use crate::style::{self, Style, StyleBuilder};
23use crate::table::{Table, TableRef};
24
25/// A Word document (.docx file).
26///
27/// This is the main entry point for reading, creating, and modifying
28/// DOCX documents.
29pub struct Document {
30    package: OpcPackage,
31    document: CT_Document,
32    styles: CT_Styles,
33    numbering: Option<CT_Numbering>,
34    core_properties: Option<CoreProperties>,
35    /// Part name for the main document
36    doc_part_name: String,
37    /// Cached count of image media parts (avoids rescanning parts on each embed).
38    image_counter: usize,
39}
40
41impl Document {
42    /// Create a new, empty document with default page setup and styles.
43    pub fn new() -> Self {
44        let mut package = OpcPackage::new_docx();
45        let document = CT_Document::new();
46        let styles = CT_Styles::new_default();
47
48        // Set up styles relationship
49        package
50            .get_or_create_part_rels("/word/document.xml")
51            .add(rel_types::STYLES, "styles.xml");
52
53        Document {
54            package,
55            document,
56            styles,
57            numbering: None,
58            core_properties: None,
59            doc_part_name: "/word/document.xml".to_string(),
60            image_counter: 0,
61        }
62    }
63
64    /// Open a document from a file path.
65    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
66        let package = OpcPackage::open(path)?;
67        Self::from_package(package)
68    }
69
70    /// Open a document from bytes.
71    pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
72        let cursor = std::io::Cursor::new(bytes);
73        let package = OpcPackage::from_reader(cursor)?;
74        Self::from_package(package)
75    }
76
77    fn from_package(package: OpcPackage) -> Result<Self> {
78        let doc_part_name = package.main_document_part().ok_or(Error::NoDocumentPart)?;
79
80        let doc_xml = package
81            .get_part(&doc_part_name)
82            .ok_or(Error::NoDocumentPart)?;
83        let document = CT_Document::from_xml(doc_xml)?;
84
85        // Try to load styles
86        let styles = if let Some(rels) = package.get_part_rels(&doc_part_name) {
87            if let Some(styles_rel) = rels.get_by_type(rel_types::STYLES) {
88                let styles_part =
89                    OpcPackage::resolve_rel_target(&doc_part_name, &styles_rel.target);
90                if let Some(styles_xml) = package.get_part(&styles_part) {
91                    CT_Styles::from_xml(styles_xml)?
92                } else {
93                    CT_Styles::new_default()
94                }
95            } else {
96                CT_Styles::new_default()
97            }
98        } else {
99            CT_Styles::new_default()
100        };
101
102        // Try to load numbering definitions
103        let numbering = if let Some(rels) = package.get_part_rels(&doc_part_name) {
104            if let Some(num_rel) = rels.get_by_type(rel_types::NUMBERING) {
105                let num_part = OpcPackage::resolve_rel_target(&doc_part_name, &num_rel.target);
106                if let Some(num_xml) = package.get_part(&num_part) {
107                    Some(CT_Numbering::from_xml(num_xml)?)
108                } else {
109                    None
110                }
111            } else {
112                None
113            }
114        } else {
115            None
116        };
117
118        // Try to load core properties from docProps/core.xml
119        let core_properties = package
120            .get_part("/docProps/core.xml")
121            .and_then(|xml| CoreProperties::from_xml(xml).ok());
122
123        let image_counter = package
124            .parts
125            .keys()
126            .filter(|k| k.starts_with("/word/media/image"))
127            .count();
128
129        Ok(Document {
130            package,
131            document,
132            styles,
133            numbering,
134            core_properties,
135            doc_part_name,
136            image_counter,
137        })
138    }
139
140    /// Save the document to a file path.
141    pub fn save<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
142        self.flush_to_package()?;
143        self.package.save(path)?;
144        Ok(())
145    }
146
147    /// Save the document to a byte vector.
148    pub fn to_bytes(&mut self) -> Result<Vec<u8>> {
149        self.flush_to_package()?;
150        let mut buf = std::io::Cursor::new(Vec::new());
151        self.package.write_to(&mut buf)?;
152        Ok(buf.into_inner())
153    }
154
155    /// Write the in-memory document/styles back into the OPC package parts.
156    fn flush_to_package(&mut self) -> Result<()> {
157        // Serialize document.xml
158        let doc_xml = self.document.to_xml()?;
159        self.package.set_part(&self.doc_part_name, doc_xml);
160
161        // Serialize styles.xml
162        let styles_xml = self.styles.to_xml()?;
163        self.package.set_part("/word/styles.xml", styles_xml);
164
165        // Serialize numbering.xml if we have numbering definitions
166        if let Some(ref numbering) = self.numbering {
167            let numbering_xml = numbering.to_xml()?;
168            self.package.set_part("/word/numbering.xml", numbering_xml);
169        }
170
171        // Serialize docProps/core.xml if we have metadata
172        if let Some(ref props) = self.core_properties {
173            let core_xml = props.to_xml()?;
174            self.package.set_part("/docProps/core.xml", core_xml);
175            self.package.content_types.add_override(
176                "/docProps/core.xml",
177                "application/vnd.openxmlformats-package.core-properties+xml",
178            );
179        }
180
181        Ok(())
182    }
183
184    // ---- Paragraph access ----
185
186    /// Get immutable references to all paragraphs.
187    pub fn paragraphs(&self) -> Vec<ParagraphRef<'_>> {
188        self.document
189            .body
190            .paragraphs()
191            .map(|p| ParagraphRef { inner: p })
192            .collect()
193    }
194
195    /// Add a paragraph with the given text and return a mutable reference.
196    pub fn add_paragraph(&mut self, text: &str) -> Paragraph<'_> {
197        let mut p = CT_P::new();
198        if !text.is_empty() {
199            p.add_run(text);
200        }
201        self.document.body.content.push(BodyContent::Paragraph(p));
202        match self.document.body.content.last_mut().unwrap() {
203            BodyContent::Paragraph(p) => Paragraph { inner: p },
204            _ => unreachable!(),
205        }
206    }
207
208    /// Get the number of paragraphs.
209    pub fn paragraph_count(&self) -> usize {
210        self.document.body.paragraphs().count()
211    }
212
213    /// Get a mutable reference to a paragraph by index (among paragraphs only).
214    pub fn paragraph_mut(&mut self, index: usize) -> Option<Paragraph<'_>> {
215        self.document
216            .body
217            .paragraphs_mut()
218            .nth(index)
219            .map(|p| Paragraph { inner: p })
220    }
221
222    // ---- Table access ----
223
224    /// Get immutable references to all tables.
225    pub fn tables(&self) -> Vec<TableRef<'_>> {
226        self.document
227            .body
228            .tables()
229            .map(|t| TableRef { inner: t })
230            .collect()
231    }
232
233    /// Add a table with the specified number of rows and columns.
234    /// Returns a mutable reference for further configuration.
235    pub fn add_table(&mut self, rows: usize, cols: usize) -> Table<'_> {
236        use rdocx_oxml::table::{CT_Row, CT_TblGrid, CT_TblGridCol, CT_TblPr, CT_TblWidth, CT_Tc};
237        use rdocx_oxml::units::Twips;
238
239        // Default column width: divide 9360tw (6.5" printable at 1" margins) evenly
240        let col_width = Twips(9360 / cols as i32);
241
242        let grid = CT_TblGrid {
243            columns: (0..cols)
244                .map(|_| CT_TblGridCol { width: col_width })
245                .collect(),
246        };
247
248        let mut tbl = CT_Tbl::new();
249        tbl.properties = Some(CT_TblPr {
250            width: Some(CT_TblWidth::dxa(col_width.0 * cols as i32)),
251            ..Default::default()
252        });
253        tbl.grid = Some(grid);
254
255        for _ in 0..rows {
256            let mut row = CT_Row::new();
257            for _ in 0..cols {
258                row.cells.push(CT_Tc::new());
259            }
260            tbl.rows.push(row);
261        }
262
263        self.document.body.content.push(BodyContent::Table(tbl));
264        match self.document.body.content.last_mut().unwrap() {
265            BodyContent::Table(t) => Table { inner: t },
266            _ => unreachable!(),
267        }
268    }
269
270    /// Get the number of tables.
271    pub fn table_count(&self) -> usize {
272        self.document.body.tables().count()
273    }
274
275    // ---- Content insertion ----
276
277    /// Get the number of body content elements (paragraphs + tables).
278    pub fn content_count(&self) -> usize {
279        self.document.body.content_count()
280    }
281
282    /// Insert a paragraph at the given body index.
283    ///
284    /// Returns a mutable `Paragraph` for further configuration.
285    /// Panics if `index > content_count()`.
286    pub fn insert_paragraph(&mut self, index: usize, text: &str) -> Paragraph<'_> {
287        let mut p = CT_P::new();
288        if !text.is_empty() {
289            p.add_run(text);
290        }
291        self.document.body.insert_paragraph(index, p);
292        match &mut self.document.body.content[index] {
293            BodyContent::Paragraph(p) => Paragraph { inner: p },
294            _ => unreachable!(),
295        }
296    }
297
298    /// Insert a table at the given body index.
299    ///
300    /// Returns a mutable `Table` for further configuration.
301    /// Panics if `index > content_count()`.
302    pub fn insert_table(&mut self, index: usize, rows: usize, cols: usize) -> Table<'_> {
303        use rdocx_oxml::table::{CT_Row, CT_TblGrid, CT_TblGridCol, CT_TblPr, CT_TblWidth, CT_Tc};
304        use rdocx_oxml::units::Twips;
305
306        let col_width = Twips(9360 / cols as i32);
307        let grid = CT_TblGrid {
308            columns: (0..cols)
309                .map(|_| CT_TblGridCol { width: col_width })
310                .collect(),
311        };
312
313        let mut tbl = CT_Tbl::new();
314        tbl.properties = Some(CT_TblPr {
315            width: Some(CT_TblWidth::dxa(col_width.0 * cols as i32)),
316            ..Default::default()
317        });
318        tbl.grid = Some(grid);
319
320        for _ in 0..rows {
321            let mut row = CT_Row::new();
322            for _ in 0..cols {
323                row.cells.push(CT_Tc::new());
324            }
325            tbl.rows.push(row);
326        }
327
328        self.document.body.insert_table(index, tbl);
329        match &mut self.document.body.content[index] {
330            BodyContent::Table(t) => Table { inner: t },
331            _ => unreachable!(),
332        }
333    }
334
335    /// Find the body content index of the first paragraph containing the given text.
336    pub fn find_content_index(&self, text: &str) -> Option<usize> {
337        self.document.body.find_paragraph_index(text)
338    }
339
340    /// Remove the content at the given body index.
341    ///
342    /// Returns `true` if an element was removed, `false` if the index was out of bounds.
343    pub fn remove_content(&mut self, index: usize) -> bool {
344        self.document.body.remove(index).is_some()
345    }
346
347    // ---- Image support ----
348
349    /// Add an inline image to the document.
350    ///
351    /// Embeds the image data (PNG, JPEG, etc.) into the package and adds a
352    /// paragraph containing the image. Returns a mutable reference to the
353    /// paragraph for further configuration.
354    ///
355    /// `width` and `height` specify the display size.
356    pub fn add_picture(
357        &mut self,
358        image_data: &[u8],
359        image_filename: &str,
360        width: Length,
361        height: Length,
362    ) -> Paragraph<'_> {
363        let rel_id = self.embed_image(image_data, image_filename);
364
365        let inline = CT_Inline::new(&rel_id, width.to_emu(), height.to_emu());
366
367        let drawing = CT_Drawing::inline(inline);
368        let run = CT_R {
369            properties: None,
370            content: vec![RunContent::Drawing(drawing)],
371            extra_xml: Vec::new(),
372        };
373
374        let mut p = CT_P::new();
375        p.runs.push(run);
376        self.document.body.content.push(BodyContent::Paragraph(p));
377        match self.document.body.content.last_mut().unwrap() {
378            BodyContent::Paragraph(p) => Paragraph { inner: p },
379            _ => unreachable!(),
380        }
381    }
382
383    /// Add a full-page background image behind text.
384    ///
385    /// The image is placed at position (0,0) relative to the page with
386    /// dimensions matching the page size from section properties.
387    /// It is inserted at the beginning of the document body so it renders
388    /// behind all other content.
389    pub fn add_background_image(
390        &mut self,
391        image_data: &[u8],
392        image_filename: &str,
393    ) -> Paragraph<'_> {
394        let rel_id = self.embed_image(image_data, image_filename);
395
396        // Get page dimensions from section properties (default US Letter)
397        let sect = self
398            .document
399            .body
400            .sect_pr
401            .as_ref()
402            .cloned()
403            .unwrap_or_else(CT_SectPr::default_letter);
404        let page_width_emu = sect
405            .page_width
406            .unwrap_or(rdocx_oxml::units::Twips(12240))
407            .to_emu()
408            .0;
409        let page_height_emu = sect
410            .page_height
411            .unwrap_or(rdocx_oxml::units::Twips(15840))
412            .to_emu()
413            .0;
414
415        let anchor = CT_Anchor::background(&rel_id, page_width_emu, page_height_emu);
416        let drawing = CT_Drawing::anchor(anchor);
417        let run = CT_R {
418            properties: None,
419            content: vec![RunContent::Drawing(drawing)],
420            extra_xml: Vec::new(),
421        };
422
423        let mut p = CT_P::new();
424        p.runs.push(run);
425        self.document.body.insert_paragraph(0, p);
426        match &mut self.document.body.content[0] {
427            BodyContent::Paragraph(p) => Paragraph { inner: p },
428            _ => unreachable!(),
429        }
430    }
431
432    /// Add an anchored (floating) image to the document.
433    ///
434    /// If `behind_text` is true, the image renders behind text content.
435    /// The image is inserted at the beginning of the document body.
436    pub fn add_anchored_image(
437        &mut self,
438        image_data: &[u8],
439        image_filename: &str,
440        width: Length,
441        height: Length,
442        behind_text: bool,
443    ) -> Paragraph<'_> {
444        let rel_id = self.embed_image(image_data, image_filename);
445
446        let mut anchor = CT_Anchor::background(&rel_id, width.to_emu(), height.to_emu());
447        anchor.behind_doc = behind_text;
448
449        let drawing = CT_Drawing::anchor(anchor);
450        let run = CT_R {
451            properties: None,
452            content: vec![RunContent::Drawing(drawing)],
453            extra_xml: Vec::new(),
454        };
455
456        let mut p = CT_P::new();
457        p.runs.push(run);
458        self.document.body.insert_paragraph(0, p);
459        match &mut self.document.body.content[0] {
460            BodyContent::Paragraph(p) => Paragraph { inner: p },
461            _ => unreachable!(),
462        }
463    }
464
465    /// Return the next unique image number and bump the counter.
466    fn next_image_number(&mut self) -> usize {
467        self.image_counter += 1;
468        self.image_counter
469    }
470
471    /// Embed an image into the OPC package and return the relationship ID.
472    fn embed_image(&mut self, image_data: &[u8], filename: &str) -> String {
473        use rdocx_opc::relationship::rel_types;
474
475        // Determine content type from extension
476        let ext = filename.rsplit('.').next().unwrap_or("png").to_lowercase();
477        let content_type = match ext.as_str() {
478            "png" => "image/png",
479            "jpg" | "jpeg" => "image/jpeg",
480            "gif" => "image/gif",
481            "bmp" => "image/bmp",
482            "tiff" | "tif" => "image/tiff",
483            "svg" => "image/svg+xml",
484            _ => "image/png",
485        };
486
487        // Generate a unique part name using cached counter
488        let image_num = self.next_image_number();
489        let part_name = format!("/word/media/image{image_num}.{ext}");
490
491        // Store the image data
492        self.package.set_part(&part_name, image_data.to_vec());
493
494        // Add content type override
495        self.package.content_types.add_default(&ext, content_type);
496
497        // Add relationship
498        let rel_target = format!("media/image{image_num}.{ext}");
499        let rels = self.package.get_or_create_part_rels(&self.doc_part_name);
500        rels.add(rel_types::IMAGE, &rel_target)
501    }
502
503    // ---- Header/Footer ----
504
505    /// Set the default header text.
506    ///
507    /// Creates a header part with the given text and references it from
508    /// the section properties.
509    pub fn set_header(&mut self, text: &str) {
510        self.set_header_footer_part(text, true, HdrFtrType::Default);
511    }
512
513    /// Set the default footer text.
514    pub fn set_footer(&mut self, text: &str) {
515        self.set_header_footer_part(text, false, HdrFtrType::Default);
516    }
517
518    /// Set the first-page header text.
519    pub fn set_first_page_header(&mut self, text: &str) {
520        self.set_different_first_page(true);
521        self.set_header_footer_part(text, true, HdrFtrType::First);
522    }
523
524    /// Set the first-page footer text.
525    pub fn set_first_page_footer(&mut self, text: &str) {
526        self.set_different_first_page(true);
527        self.set_header_footer_part(text, false, HdrFtrType::First);
528    }
529
530    fn set_header_footer_part(&mut self, text: &str, is_header: bool, hdr_type: HdrFtrType) {
531        use rdocx_opc::relationship::rel_types;
532
533        let mut hdr_ftr = CT_HdrFtr::new();
534        let mut p = CT_P::new();
535        if !text.is_empty() {
536            p.add_run(text);
537        }
538        hdr_ftr.paragraphs.push(p);
539
540        // Determine part name based on type
541        let type_suffix = match hdr_type {
542            HdrFtrType::Default => "",
543            HdrFtrType::First => "First",
544            HdrFtrType::Even => "Even",
545        };
546        let (part_name, rel_type, content_type) = if is_header {
547            (
548                format!("/word/header{type_suffix}1.xml"),
549                rel_types::HEADER,
550                "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml",
551            )
552        } else {
553            (
554                format!("/word/footer{type_suffix}1.xml"),
555                rel_types::FOOTER,
556                "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml",
557            )
558        };
559
560        // Serialize the header/footer
561        let xml = if is_header {
562            hdr_ftr
563                .to_xml_header()
564                .expect("header serialization failed")
565        } else {
566            hdr_ftr
567                .to_xml_footer()
568                .expect("footer serialization failed")
569        };
570
571        self.package.set_part(&part_name, xml);
572        self.package
573            .content_types
574            .add_override(&part_name, content_type);
575
576        // Add relationship
577        let rel_target = part_name.trim_start_matches("/word/");
578        let rels = self.package.get_or_create_part_rels(&self.doc_part_name);
579        let rel_id = rels.add(rel_type, rel_target);
580
581        // Add reference in section properties
582        let sect = self.section_properties_mut();
583        let refs = if is_header {
584            &mut sect.header_refs
585        } else {
586            &mut sect.footer_refs
587        };
588
589        // Remove existing ref of same type
590        refs.retain(|r| r.hdr_ftr_type != hdr_type);
591        refs.push(HdrFtrRef {
592            hdr_ftr_type: hdr_type,
593            rel_id,
594        });
595    }
596
597    /// Get the default header text, if set.
598    pub fn header_text(&self) -> Option<String> {
599        self.get_header_footer_text(true, HdrFtrType::Default)
600    }
601
602    /// Get the default footer text, if set.
603    pub fn footer_text(&self) -> Option<String> {
604        self.get_header_footer_text(false, HdrFtrType::Default)
605    }
606
607    /// Set the default header to an inline image.
608    ///
609    /// Creates a header part with an image paragraph. The image is embedded
610    /// in the header part's relationships.
611    pub fn set_header_image(
612        &mut self,
613        image_data: &[u8],
614        image_filename: &str,
615        width: Length,
616        height: Length,
617    ) {
618        self.set_header_footer_image_part(
619            image_data,
620            image_filename,
621            width,
622            height,
623            true,
624            HdrFtrType::Default,
625        );
626    }
627
628    /// Set the default footer to an inline image.
629    pub fn set_footer_image(
630        &mut self,
631        image_data: &[u8],
632        image_filename: &str,
633        width: Length,
634        height: Length,
635    ) {
636        self.set_header_footer_image_part(
637            image_data,
638            image_filename,
639            width,
640            height,
641            false,
642            HdrFtrType::Default,
643        );
644    }
645
646    /// Set a header from raw XML bytes with associated images.
647    ///
648    /// This is useful for copying complex headers from template documents
649    /// that contain grouped shapes, VML, or other elements not easily
650    /// recreated through the high-level API.
651    ///
652    /// Each entry in `images` is `(rel_id, image_data, image_filename)`:
653    /// - `rel_id`: the relationship ID referenced in the header XML (e.g. "rId1")
654    /// - `image_data`: the raw image bytes
655    /// - `image_filename`: used to derive the part name and content type (e.g. "image5.png")
656    pub fn set_raw_header_with_images(
657        &mut self,
658        header_xml: Vec<u8>,
659        images: &[(&str, &[u8], &str)],
660        hdr_type: HdrFtrType,
661    ) {
662        self.set_raw_hdr_ftr_with_images(header_xml, images, true, hdr_type);
663    }
664
665    /// Set a footer from raw XML bytes with associated images.
666    pub fn set_raw_footer_with_images(
667        &mut self,
668        footer_xml: Vec<u8>,
669        images: &[(&str, &[u8], &str)],
670        hdr_type: HdrFtrType,
671    ) {
672        self.set_raw_hdr_ftr_with_images(footer_xml, images, false, hdr_type);
673    }
674
675    fn set_raw_hdr_ftr_with_images(
676        &mut self,
677        xml: Vec<u8>,
678        images: &[(&str, &[u8], &str)],
679        is_header: bool,
680        hdr_type: HdrFtrType,
681    ) {
682        use rdocx_opc::relationship::rel_types;
683
684        let type_suffix = match hdr_type {
685            HdrFtrType::Default => "",
686            HdrFtrType::First => "First",
687            HdrFtrType::Even => "Even",
688        };
689        let (part_name, rel_type, content_type) = if is_header {
690            (
691                format!("/word/header{type_suffix}1.xml"),
692                rel_types::HEADER,
693                "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml",
694            )
695        } else {
696            (
697                format!("/word/footer{type_suffix}1.xml"),
698                rel_types::FOOTER,
699                "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml",
700            )
701        };
702
703        // Store the raw header/footer XML
704        self.package.set_part(&part_name, xml);
705        self.package
706            .content_types
707            .add_override(&part_name, content_type);
708
709        // Store each image and create relationships with the specified rel_ids
710        for &(rel_id, image_data, image_filename) in images {
711            let ext = image_filename
712                .rsplit('.')
713                .next()
714                .unwrap_or("png")
715                .to_lowercase();
716            let img_content_type = match ext.as_str() {
717                "png" => "image/png",
718                "jpg" | "jpeg" => "image/jpeg",
719                _ => "image/png",
720            };
721
722            let image_num = self.next_image_number();
723            let img_part_name = format!("/word/media/image{image_num}.{ext}");
724            self.package.set_part(&img_part_name, image_data.to_vec());
725            self.package
726                .content_types
727                .add_default(&ext, img_content_type);
728
729            // Create relationship in the header/footer part's rels with the EXACT rel_id
730            let img_rel_target = format!("media/image{image_num}.{ext}");
731            let hdr_rels = self.package.get_or_create_part_rels(&part_name);
732            hdr_rels.add_with_id(rel_id, rel_types::IMAGE, &img_rel_target);
733        }
734
735        // Add relationship from document to header/footer
736        let rel_target = part_name.trim_start_matches("/word/");
737        let rels = self.package.get_or_create_part_rels(&self.doc_part_name);
738        let rel_id = rels.add(rel_type, rel_target);
739
740        // Add reference in section properties
741        let sect = self.section_properties_mut();
742        let refs = if is_header {
743            &mut sect.header_refs
744        } else {
745            &mut sect.footer_refs
746        };
747
748        refs.retain(|r| r.hdr_ftr_type != hdr_type);
749        refs.push(HdrFtrRef {
750            hdr_ftr_type: hdr_type,
751            rel_id,
752        });
753    }
754
755    /// Set the default header to an inline image with a colored background.
756    ///
757    /// Creates a header part where the paragraph has shading fill set to
758    /// `bg_color` (hex string, e.g. "000000" for black) and contains the
759    /// inline image.
760    pub fn set_header_image_with_background(
761        &mut self,
762        image_data: &[u8],
763        image_filename: &str,
764        width: Length,
765        height: Length,
766        bg_color: &str,
767    ) {
768        self.set_header_footer_image_bg_part(
769            image_data,
770            image_filename,
771            width,
772            height,
773            Some(bg_color),
774            true,
775            HdrFtrType::Default,
776        );
777    }
778
779    /// Set the first-page header to an inline image.
780    pub fn set_first_page_header_image(
781        &mut self,
782        image_data: &[u8],
783        image_filename: &str,
784        width: Length,
785        height: Length,
786    ) {
787        self.set_different_first_page(true);
788        self.set_header_footer_image_part(
789            image_data,
790            image_filename,
791            width,
792            height,
793            true,
794            HdrFtrType::First,
795        );
796    }
797
798    fn set_header_footer_image_part(
799        &mut self,
800        image_data: &[u8],
801        image_filename: &str,
802        width: Length,
803        height: Length,
804        is_header: bool,
805        hdr_type: HdrFtrType,
806    ) {
807        use rdocx_opc::relationship::rel_types;
808
809        // Determine part name based on type
810        let type_suffix = match hdr_type {
811            HdrFtrType::Default => "",
812            HdrFtrType::First => "First",
813            HdrFtrType::Even => "Even",
814        };
815        let (part_name, rel_type, content_type) = if is_header {
816            (
817                format!("/word/header{type_suffix}1.xml"),
818                rel_types::HEADER,
819                "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml",
820            )
821        } else {
822            (
823                format!("/word/footer{type_suffix}1.xml"),
824                rel_types::FOOTER,
825                "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml",
826            )
827        };
828
829        // Embed the image in the package
830        let ext = image_filename
831            .rsplit('.')
832            .next()
833            .unwrap_or("png")
834            .to_lowercase();
835        let img_content_type = match ext.as_str() {
836            "png" => "image/png",
837            "jpg" | "jpeg" => "image/jpeg",
838            _ => "image/png",
839        };
840
841        // Generate unique image name using cached counter
842        let image_num = self.next_image_number();
843        let img_part_name = format!("/word/media/image{image_num}.{ext}");
844        self.package.set_part(&img_part_name, image_data.to_vec());
845        self.package
846            .content_types
847            .add_default(&ext, img_content_type);
848
849        // Create image relationship in the HEADER/FOOTER part's rels
850        let img_rel_target = format!("media/image{image_num}.{ext}");
851        let hdr_rels = self.package.get_or_create_part_rels(&part_name);
852        let img_rel_id = hdr_rels.add(rel_types::IMAGE, &img_rel_target);
853
854        // Build header/footer with image paragraph
855        let inline = CT_Inline::new(&img_rel_id, width.to_emu(), height.to_emu());
856        let drawing = CT_Drawing::inline(inline);
857        let run = CT_R {
858            properties: None,
859            content: vec![RunContent::Drawing(drawing)],
860            extra_xml: Vec::new(),
861        };
862
863        let mut hdr_ftr = CT_HdrFtr::new();
864        let mut p = CT_P::new();
865        p.runs.push(run);
866        hdr_ftr.paragraphs.push(p);
867
868        // Serialize
869        let xml = if is_header {
870            hdr_ftr
871                .to_xml_header()
872                .expect("header serialization failed")
873        } else {
874            hdr_ftr
875                .to_xml_footer()
876                .expect("footer serialization failed")
877        };
878
879        self.package.set_part(&part_name, xml);
880        self.package
881            .content_types
882            .add_override(&part_name, content_type);
883
884        // Add relationship from document to header/footer
885        let rel_target = part_name.trim_start_matches("/word/");
886        let rels = self.package.get_or_create_part_rels(&self.doc_part_name);
887        let rel_id = rels.add(rel_type, rel_target);
888
889        // Add reference in section properties
890        let sect = self.section_properties_mut();
891        let refs = if is_header {
892            &mut sect.header_refs
893        } else {
894            &mut sect.footer_refs
895        };
896
897        refs.retain(|r| r.hdr_ftr_type != hdr_type);
898        refs.push(HdrFtrRef {
899            hdr_ftr_type: hdr_type,
900            rel_id,
901        });
902    }
903
904    fn set_header_footer_image_bg_part(
905        &mut self,
906        image_data: &[u8],
907        image_filename: &str,
908        width: Length,
909        height: Length,
910        bg_color: Option<&str>,
911        is_header: bool,
912        hdr_type: HdrFtrType,
913    ) {
914        use rdocx_opc::relationship::rel_types;
915        use rdocx_oxml::properties::CT_Shd;
916
917        // Determine part name based on type
918        let type_suffix = match hdr_type {
919            HdrFtrType::Default => "",
920            HdrFtrType::First => "First",
921            HdrFtrType::Even => "Even",
922        };
923        let (part_name, rel_type, content_type) = if is_header {
924            (
925                format!("/word/header{type_suffix}1.xml"),
926                rel_types::HEADER,
927                "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml",
928            )
929        } else {
930            (
931                format!("/word/footer{type_suffix}1.xml"),
932                rel_types::FOOTER,
933                "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml",
934            )
935        };
936
937        // Embed the image in the package
938        let ext = image_filename
939            .rsplit('.')
940            .next()
941            .unwrap_or("png")
942            .to_lowercase();
943        let img_content_type = match ext.as_str() {
944            "png" => "image/png",
945            "jpg" | "jpeg" => "image/jpeg",
946            _ => "image/png",
947        };
948
949        let image_num = self.next_image_number();
950        let img_part_name = format!("/word/media/image{image_num}.{ext}");
951        self.package.set_part(&img_part_name, image_data.to_vec());
952        self.package
953            .content_types
954            .add_default(&ext, img_content_type);
955
956        // Create image relationship in the HEADER/FOOTER part's rels
957        let img_rel_target = format!("media/image{image_num}.{ext}");
958        let hdr_rels = self.package.get_or_create_part_rels(&part_name);
959        let img_rel_id = hdr_rels.add(rel_types::IMAGE, &img_rel_target);
960
961        // Build header/footer with image paragraph
962        let inline = CT_Inline::new(&img_rel_id, width.to_emu(), height.to_emu());
963        let drawing = CT_Drawing::inline(inline);
964        let run = CT_R {
965            properties: None,
966            content: vec![RunContent::Drawing(drawing)],
967            extra_xml: Vec::new(),
968        };
969
970        let mut hdr_ftr = CT_HdrFtr::new();
971        let mut p = CT_P::new();
972        p.runs.push(run);
973
974        // Add background shading if requested
975        if let Some(color) = bg_color {
976            let ppr = CT_PPr {
977                shading: Some(CT_Shd {
978                    val: "clear".to_string(),
979                    color: Some("auto".to_string()),
980                    fill: Some(color.to_string()),
981                }),
982                ..Default::default()
983            };
984            p.properties = Some(ppr);
985        }
986
987        hdr_ftr.paragraphs.push(p);
988
989        // Serialize
990        let xml = if is_header {
991            hdr_ftr
992                .to_xml_header()
993                .expect("header serialization failed")
994        } else {
995            hdr_ftr
996                .to_xml_footer()
997                .expect("footer serialization failed")
998        };
999
1000        self.package.set_part(&part_name, xml);
1001        self.package
1002            .content_types
1003            .add_override(&part_name, content_type);
1004
1005        // Add relationship from document to header/footer
1006        let rel_target = part_name.trim_start_matches("/word/");
1007        let rels = self.package.get_or_create_part_rels(&self.doc_part_name);
1008        let rel_id = rels.add(rel_type, rel_target);
1009
1010        // Add reference in section properties
1011        let sect = self.section_properties_mut();
1012        let refs = if is_header {
1013            &mut sect.header_refs
1014        } else {
1015            &mut sect.footer_refs
1016        };
1017
1018        refs.retain(|r| r.hdr_ftr_type != hdr_type);
1019        refs.push(HdrFtrRef {
1020            hdr_ftr_type: hdr_type,
1021            rel_id,
1022        });
1023    }
1024
1025    fn get_header_footer_text(&self, is_header: bool, hdr_type: HdrFtrType) -> Option<String> {
1026        let sect = self.document.body.sect_pr.as_ref()?;
1027        let refs = if is_header {
1028            &sect.header_refs
1029        } else {
1030            &sect.footer_refs
1031        };
1032        let hdr_ref = refs.iter().find(|r| r.hdr_ftr_type == hdr_type)?;
1033
1034        // Resolve the part
1035        let rels = self.package.get_part_rels(&self.doc_part_name)?;
1036        let rel = rels.get_by_id(&hdr_ref.rel_id)?;
1037        let part_name = OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target);
1038        let xml = self.package.get_part(&part_name)?;
1039        let hdr_ftr = CT_HdrFtr::from_xml(xml).ok()?;
1040        Some(hdr_ftr.text())
1041    }
1042
1043    // ---- Numbering/Lists ----
1044
1045    /// Ensure a numbering part exists, creating it and its relationship if needed.
1046    fn ensure_numbering(&mut self) -> &mut CT_Numbering {
1047        if self.numbering.is_none() {
1048            self.numbering = Some(CT_Numbering::new());
1049
1050            // Set up numbering relationship and content type
1051            self.package
1052                .get_or_create_part_rels(&self.doc_part_name)
1053                .add(rel_types::NUMBERING, "numbering.xml");
1054            self.package.content_types.add_override(
1055                "/word/numbering.xml",
1056                "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml",
1057            );
1058        }
1059        self.numbering.as_mut().unwrap()
1060    }
1061
1062    /// Add a bullet list item at the given indentation level (0-based).
1063    ///
1064    /// If no bullet list definition exists yet, one is created automatically.
1065    /// Returns a mutable `Paragraph` for further configuration.
1066    pub fn add_bullet_list_item(&mut self, text: &str, level: u32) -> Paragraph<'_> {
1067        // Find or create a bullet list numId
1068        let num_id = {
1069            let numbering = self.ensure_numbering();
1070            // Look for an existing bullet list
1071            let existing = numbering.nums.iter().find(|n| {
1072                numbering
1073                    .get_abstract_num_for(n.num_id)
1074                    .map(|a| {
1075                        a.levels.first().and_then(|l| l.num_fmt)
1076                            == Some(rdocx_oxml::numbering::ST_NumberFormat::Bullet)
1077                    })
1078                    .unwrap_or(false)
1079            });
1080            if let Some(existing) = existing {
1081                existing.num_id
1082            } else {
1083                numbering.add_bullet_list()
1084            }
1085        };
1086
1087        let mut p = CT_P::new();
1088        if !text.is_empty() {
1089            p.add_run(text);
1090        }
1091        let ppr = CT_PPr {
1092            num_id: Some(num_id),
1093            num_ilvl: Some(level),
1094            ..Default::default()
1095        };
1096        p.properties = Some(ppr);
1097
1098        self.document.body.content.push(BodyContent::Paragraph(p));
1099        match self.document.body.content.last_mut().unwrap() {
1100            BodyContent::Paragraph(p) => Paragraph { inner: p },
1101            _ => unreachable!(),
1102        }
1103    }
1104
1105    /// Add a numbered list item at the given indentation level (0-based).
1106    ///
1107    /// If no numbered list definition exists yet, one is created automatically.
1108    /// Returns a mutable `Paragraph` for further configuration.
1109    pub fn add_numbered_list_item(&mut self, text: &str, level: u32) -> Paragraph<'_> {
1110        // Find or create a numbered list numId
1111        let num_id = {
1112            let numbering = self.ensure_numbering();
1113            // Look for an existing numbered list
1114            let existing = numbering.nums.iter().find(|n| {
1115                numbering
1116                    .get_abstract_num_for(n.num_id)
1117                    .map(|a| {
1118                        a.levels.first().and_then(|l| l.num_fmt)
1119                            == Some(rdocx_oxml::numbering::ST_NumberFormat::Decimal)
1120                    })
1121                    .unwrap_or(false)
1122            });
1123            if let Some(existing) = existing {
1124                existing.num_id
1125            } else {
1126                numbering.add_numbered_list()
1127            }
1128        };
1129
1130        let mut p = CT_P::new();
1131        if !text.is_empty() {
1132            p.add_run(text);
1133        }
1134        let ppr = CT_PPr {
1135            num_id: Some(num_id),
1136            num_ilvl: Some(level),
1137            ..Default::default()
1138        };
1139        p.properties = Some(ppr);
1140
1141        self.document.body.content.push(BodyContent::Paragraph(p));
1142        match self.document.body.content.last_mut().unwrap() {
1143            BodyContent::Paragraph(p) => Paragraph { inner: p },
1144            _ => unreachable!(),
1145        }
1146    }
1147
1148    // ---- Style access ----
1149
1150    /// Get all styles.
1151    pub fn styles(&self) -> Vec<Style<'_>> {
1152        self.styles
1153            .styles
1154            .iter()
1155            .map(|s| Style { inner: s })
1156            .collect()
1157    }
1158
1159    /// Find a style by its ID.
1160    pub fn style(&self, style_id: &str) -> Option<Style<'_>> {
1161        self.styles.get_by_id(style_id).map(|s| Style { inner: s })
1162    }
1163
1164    // ---- Style manipulation ----
1165
1166    /// Add a custom style to the document.
1167    pub fn add_style(&mut self, builder: StyleBuilder) {
1168        self.styles.styles.push(builder.build());
1169    }
1170
1171    /// Resolve the effective paragraph properties for a given style ID,
1172    /// walking the full inheritance chain (docDefaults → basedOn → ...).
1173    pub fn resolve_paragraph_properties(&self, style_id: Option<&str>) -> CT_PPr {
1174        style::resolve_paragraph_properties(style_id, &self.styles)
1175    }
1176
1177    /// Resolve the effective run properties for the given paragraph and character styles,
1178    /// walking the full inheritance chain.
1179    pub fn resolve_run_properties(
1180        &self,
1181        para_style_id: Option<&str>,
1182        run_style_id: Option<&str>,
1183    ) -> CT_RPr {
1184        style::resolve_run_properties(para_style_id, run_style_id, &self.styles)
1185    }
1186
1187    // ---- Section/Page setup ----
1188
1189    /// Get the section properties (page size, margins).
1190    pub fn section_properties(&self) -> Option<&CT_SectPr> {
1191        self.document.body.sect_pr.as_ref()
1192    }
1193
1194    /// Get a mutable reference to section properties, creating defaults if needed.
1195    pub fn section_properties_mut(&mut self) -> &mut CT_SectPr {
1196        self.document
1197            .body
1198            .sect_pr
1199            .get_or_insert_with(CT_SectPr::default_letter)
1200    }
1201
1202    /// Set page size.
1203    pub fn set_page_size(&mut self, width: Length, height: Length) {
1204        let sect = self.section_properties_mut();
1205        sect.page_width = Some(width.as_twips());
1206        sect.page_height = Some(height.as_twips());
1207    }
1208
1209    /// Set page orientation to landscape (swaps width and height if needed).
1210    pub fn set_landscape(&mut self) {
1211        let sect = self.section_properties_mut();
1212        sect.orientation = Some(ST_PageOrientation::Landscape);
1213        // Swap width/height if portrait dimensions
1214        if let (Some(w), Some(h)) = (sect.page_width, sect.page_height)
1215            && w.0 < h.0
1216        {
1217            sect.page_width = Some(h);
1218            sect.page_height = Some(w);
1219        }
1220    }
1221
1222    /// Set page orientation to portrait (swaps width and height if needed).
1223    pub fn set_portrait(&mut self) {
1224        let sect = self.section_properties_mut();
1225        sect.orientation = Some(ST_PageOrientation::Portrait);
1226        // Swap width/height if landscape dimensions
1227        if let (Some(w), Some(h)) = (sect.page_width, sect.page_height)
1228            && w.0 > h.0
1229        {
1230            sect.page_width = Some(h);
1231            sect.page_height = Some(w);
1232        }
1233    }
1234
1235    /// Set all page margins.
1236    pub fn set_margins(&mut self, top: Length, right: Length, bottom: Length, left: Length) {
1237        let sect = self.section_properties_mut();
1238        sect.margin_top = Some(top.as_twips());
1239        sect.margin_right = Some(right.as_twips());
1240        sect.margin_bottom = Some(bottom.as_twips());
1241        sect.margin_left = Some(left.as_twips());
1242    }
1243
1244    /// Set equal-width column layout.
1245    pub fn set_columns(&mut self, num: u32, spacing: Length) {
1246        let sect = self.section_properties_mut();
1247        sect.columns = Some(CT_Columns {
1248            num: Some(num),
1249            space: Some(spacing.as_twips()),
1250            equal_width: Some(true),
1251            sep: None,
1252            columns: Vec::new(),
1253        });
1254    }
1255
1256    /// Set header and footer distances from page edges.
1257    pub fn set_header_footer_distance(&mut self, header: Length, footer: Length) {
1258        let sect = self.section_properties_mut();
1259        sect.header_distance = Some(header.as_twips());
1260        sect.footer_distance = Some(footer.as_twips());
1261    }
1262
1263    /// Set the gutter margin.
1264    pub fn set_gutter(&mut self, gutter: Length) {
1265        self.section_properties_mut().gutter = Some(gutter.as_twips());
1266    }
1267
1268    /// Enable or disable different first page header/footer.
1269    pub fn set_different_first_page(&mut self, val: bool) {
1270        self.section_properties_mut().title_pg = Some(val);
1271    }
1272
1273    // ---- Metadata access ----
1274
1275    /// Get the document title.
1276    pub fn title(&self) -> Option<&str> {
1277        self.core_properties.as_ref()?.title.as_deref()
1278    }
1279
1280    /// Set the document title.
1281    pub fn set_title(&mut self, title: &str) {
1282        self.ensure_core_properties().title = Some(title.to_string());
1283    }
1284
1285    /// Get the document author/creator.
1286    pub fn author(&self) -> Option<&str> {
1287        self.core_properties.as_ref()?.creator.as_deref()
1288    }
1289
1290    /// Set the document author/creator.
1291    pub fn set_author(&mut self, author: &str) {
1292        self.ensure_core_properties().creator = Some(author.to_string());
1293    }
1294
1295    /// Get the document subject.
1296    pub fn subject(&self) -> Option<&str> {
1297        self.core_properties.as_ref()?.subject.as_deref()
1298    }
1299
1300    /// Set the document subject.
1301    pub fn set_subject(&mut self, subject: &str) {
1302        self.ensure_core_properties().subject = Some(subject.to_string());
1303    }
1304
1305    /// Get the document keywords.
1306    pub fn keywords(&self) -> Option<&str> {
1307        self.core_properties.as_ref()?.keywords.as_deref()
1308    }
1309
1310    /// Set the document keywords.
1311    pub fn set_keywords(&mut self, keywords: &str) {
1312        self.ensure_core_properties().keywords = Some(keywords.to_string());
1313    }
1314
1315    fn ensure_core_properties(&mut self) -> &mut CoreProperties {
1316        self.core_properties
1317            .get_or_insert_with(CoreProperties::default)
1318    }
1319
1320    // ---- Document Merging ----
1321
1322    /// Append the content of another document to this document.
1323    ///
1324    /// Copies all body content (paragraphs and tables) from the other document.
1325    /// Handles style deduplication and numbering remapping.
1326    pub fn append(&mut self, other: &Document) {
1327        self.merge_styles(other);
1328
1329        let start_idx = self.document.body.content.len();
1330        for content in &other.document.body.content {
1331            self.document.body.content.push(content.clone());
1332        }
1333
1334        self.remap_merged_numbering(other, start_idx);
1335    }
1336
1337    /// Append the content of another document with a section break.
1338    pub fn append_with_break(&mut self, other: &Document, break_type: crate::SectionBreak) {
1339        // Insert a section break paragraph before the merged content
1340        let mut p = CT_P::new();
1341        let sect_pr = match break_type {
1342            crate::SectionBreak::NextPage => CT_SectPr::default_letter(),
1343            crate::SectionBreak::Continuous => {
1344                let mut sp = CT_SectPr::default_letter();
1345                sp.section_type = Some(ST_SectionType::Continuous);
1346                sp
1347            }
1348            crate::SectionBreak::EvenPage => {
1349                let mut sp = CT_SectPr::default_letter();
1350                sp.section_type = Some(ST_SectionType::EvenPage);
1351                sp
1352            }
1353            crate::SectionBreak::OddPage => {
1354                let mut sp = CT_SectPr::default_letter();
1355                sp.section_type = Some(ST_SectionType::OddPage);
1356                sp
1357            }
1358        };
1359        p.properties = Some(CT_PPr {
1360            sect_pr: Some(sect_pr),
1361            ..Default::default()
1362        });
1363        self.document.body.content.push(BodyContent::Paragraph(p));
1364
1365        self.append(other);
1366    }
1367
1368    /// Insert the content of another document at a specified body index.
1369    pub fn insert_document(&mut self, index: usize, other: &Document) {
1370        self.merge_styles(other);
1371
1372        let insert_at = index.min(self.document.body.content.len());
1373        for (i, content) in other.document.body.content.iter().enumerate() {
1374            self.document
1375                .body
1376                .content
1377                .insert(insert_at + i, content.clone());
1378        }
1379
1380        self.remap_merged_numbering(other, insert_at);
1381    }
1382
1383    /// Merge styles from another document, avoiding duplicates.
1384    fn merge_styles(&mut self, other: &Document) {
1385        for style in &other.styles.styles {
1386            if self.styles.get_by_id(&style.style_id).is_none() {
1387                self.styles.styles.push(style.clone());
1388            }
1389        }
1390    }
1391
1392    /// Merge numbering from another document and remap IDs in the merged content.
1393    /// `start_idx` is the index where the other document's content starts in self.
1394    fn remap_merged_numbering(&mut self, other: &Document, start_idx: usize) {
1395        let Some(other_numbering) = &other.numbering else {
1396            return;
1397        };
1398
1399        let numbering = self
1400            .numbering
1401            .get_or_insert_with(|| rdocx_oxml::numbering::CT_Numbering {
1402                abstract_nums: Vec::new(),
1403                nums: Vec::new(),
1404            });
1405
1406        // Find max existing IDs to avoid collision
1407        let max_abstract_id = numbering
1408            .abstract_nums
1409            .iter()
1410            .map(|a| a.abstract_num_id)
1411            .max()
1412            .unwrap_or(0);
1413        let max_num_id = numbering.nums.iter().map(|n| n.num_id).max().unwrap_or(0);
1414
1415        let abstract_offset = max_abstract_id + 1;
1416        let num_offset = max_num_id + 1;
1417
1418        // Copy abstract nums with remapped IDs
1419        for abs_num in &other_numbering.abstract_nums {
1420            let mut new_abs = abs_num.clone();
1421            new_abs.abstract_num_id += abstract_offset;
1422            numbering.abstract_nums.push(new_abs);
1423        }
1424
1425        // Copy num instances with remapped IDs
1426        for num in &other_numbering.nums {
1427            let mut new_num = num.clone();
1428            new_num.num_id += num_offset;
1429            new_num.abstract_num_id += abstract_offset;
1430            numbering.nums.push(new_num);
1431        }
1432
1433        // Remap numId references in the merged content
1434        let incoming_count = other.document.body.content.len();
1435        for content in self.document.body.content[start_idx..start_idx + incoming_count].iter_mut()
1436        {
1437            Self::remap_num_ids(content, num_offset);
1438        }
1439    }
1440
1441    /// Remap numId references in body content by adding an offset.
1442    fn remap_num_ids(content: &mut BodyContent, offset: u32) {
1443        match content {
1444            BodyContent::Paragraph(p) => {
1445                Self::remap_paragraph_num_id(p, offset);
1446            }
1447            BodyContent::Table(tbl) => {
1448                Self::remap_table_num_ids(tbl, offset);
1449            }
1450            BodyContent::RawXml(_) => {}
1451        }
1452    }
1453
1454    fn remap_paragraph_num_id(p: &mut CT_P, offset: u32) {
1455        if let Some(ppr) = &mut p.properties
1456            && let Some(num_id) = &mut ppr.num_id
1457            && *num_id > 0
1458        {
1459            *num_id += offset;
1460        }
1461    }
1462
1463    fn remap_table_num_ids(tbl: &mut CT_Tbl, offset: u32) {
1464        for row in &mut tbl.rows {
1465            for cell in &mut row.cells {
1466                for cc in &mut cell.content {
1467                    match cc {
1468                        rdocx_oxml::table::CellContent::Paragraph(p) => {
1469                            Self::remap_paragraph_num_id(p, offset);
1470                        }
1471                        rdocx_oxml::table::CellContent::Table(nested) => {
1472                            Self::remap_table_num_ids(nested, offset);
1473                        }
1474                    }
1475                }
1476            }
1477        }
1478    }
1479
1480    // ---- Table of Contents ----
1481
1482    /// Insert a Table of Contents at the given body content index.
1483    ///
1484    /// Scans the document for heading paragraphs (Heading1..HeadingN where N <= max_level),
1485    /// inserts bookmark markers at each heading, and generates TOC entry paragraphs
1486    /// with internal hyperlinks and dot-leader tab stops.
1487    ///
1488    /// # Arguments
1489    /// * `index` - Body content index at which to insert the TOC
1490    /// * `max_level` - Maximum heading level to include (1-9, typically 3)
1491    pub fn insert_toc(&mut self, index: usize, max_level: u32) {
1492        use rdocx_oxml::borders::{CT_TabStop, CT_Tabs};
1493        use rdocx_oxml::shared::{ST_TabJc, ST_TabLeader};
1494        use rdocx_oxml::text::HyperlinkSpan;
1495        use rdocx_oxml::units::Twips;
1496
1497        let max_level = max_level.clamp(1, 9);
1498
1499        // Step 1: Collect heading info from the document body
1500        struct HeadingInfo {
1501            content_index: usize,
1502            level: u32,
1503            text: String,
1504            bookmark_name: String,
1505        }
1506
1507        let mut headings = Vec::new();
1508        let mut toc_counter = 0u32;
1509
1510        for (idx, content) in self.document.body.content.iter().enumerate() {
1511            if let BodyContent::Paragraph(p) = content
1512                && let Some(level) = Self::detect_heading_level_for_toc(p)
1513                && level <= max_level
1514            {
1515                let text = p.text();
1516                if !text.trim().is_empty() {
1517                    toc_counter += 1;
1518                    headings.push(HeadingInfo {
1519                        content_index: idx,
1520                        level,
1521                        text,
1522                        bookmark_name: format!("_Toc{toc_counter}"),
1523                    });
1524                }
1525            }
1526        }
1527
1528        // Step 2: Insert bookmark markers at each heading paragraph (as raw XML in extra_xml)
1529        // We insert bookmarkStart/bookmarkEnd as extra_xml at position 0 in the paragraph.
1530        // Adjust for insertions that shift indices.
1531        let mut bookmark_id = 100; // Start at a high ID to avoid collision
1532        for heading in &headings {
1533            if let Some(BodyContent::Paragraph(p)) =
1534                self.document.body.content.get_mut(heading.content_index)
1535            {
1536                let bm_start = format!(
1537                    "<w:bookmarkStart w:id=\"{bookmark_id}\" w:name=\"{}\"/>",
1538                    heading.bookmark_name
1539                );
1540                let bm_end = format!("<w:bookmarkEnd w:id=\"{bookmark_id}\"/>");
1541                // Insert at position 0 (before runs)
1542                p.extra_xml.push((0, bm_start.into_bytes()));
1543                // Insert at end (after runs)
1544                p.extra_xml.push((p.runs.len(), bm_end.into_bytes()));
1545                bookmark_id += 1;
1546            }
1547        }
1548
1549        // Step 3: Build TOC entry paragraphs
1550        // Right margin tab stop at 9360 twips (6.5") with dot leader
1551        let right_tab = CT_Tabs {
1552            tabs: vec![CT_TabStop {
1553                val: ST_TabJc::Right,
1554                pos: Twips(9360),
1555                leader: Some(ST_TabLeader::Dot),
1556            }],
1557        };
1558
1559        let mut toc_paragraphs: Vec<CT_P> = Vec::new();
1560
1561        // TOC title
1562        let mut title_p = CT_P::new();
1563        let mut title_r = CT_R::new("Table of Contents");
1564        title_r.properties = Some(CT_RPr {
1565            bold: Some(true),
1566            ..Default::default()
1567        });
1568        title_p.runs.push(title_r);
1569        title_p.properties = Some(CT_PPr {
1570            space_after: Some(Twips(120)),
1571            ..Default::default()
1572        });
1573        toc_paragraphs.push(title_p);
1574
1575        for heading in &headings {
1576            let mut p = CT_P::new();
1577
1578            // Indentation based on heading level (each level indented 360 twips = 0.25")
1579            let indent = Twips(360 * (heading.level as i32 - 1));
1580
1581            p.properties = Some(CT_PPr {
1582                tabs: Some(right_tab.clone()),
1583                ind_left: if indent.0 > 0 { Some(indent) } else { None },
1584                ..Default::default()
1585            });
1586
1587            // Run with heading text
1588            let text_run = CT_R::new(&heading.text);
1589            p.runs.push(text_run);
1590
1591            // Tab run (separates text from page number)
1592            p.runs.push(CT_R {
1593                properties: None,
1594                content: vec![rdocx_oxml::text::RunContent::Tab],
1595                extra_xml: Vec::new(),
1596            });
1597
1598            // Wrap the text run in a hyperlink to the bookmark
1599            p.hyperlinks.push(HyperlinkSpan {
1600                rel_id: None,
1601                anchor: Some(heading.bookmark_name.clone()),
1602                run_start: 0,
1603                run_end: 1, // Just the text run, not the tab
1604            });
1605
1606            toc_paragraphs.push(p);
1607        }
1608
1609        // Step 4: Insert TOC paragraphs at the specified index
1610        let insert_at = index.min(self.document.body.content.len());
1611        for (i, p) in toc_paragraphs.into_iter().enumerate() {
1612            self.document
1613                .body
1614                .content
1615                .insert(insert_at + i, BodyContent::Paragraph(p));
1616        }
1617    }
1618
1619    /// Detect heading level from a paragraph's style ID.
1620    fn detect_heading_level_for_toc(para: &CT_P) -> Option<u32> {
1621        let ppr = para.properties.as_ref()?;
1622        let style_id = ppr.style_id.as_deref()?;
1623        let rest = style_id.strip_prefix("Heading")?;
1624        rest.parse::<u32>().ok().filter(|n| (1..=9).contains(n))
1625    }
1626
1627    // ---- Placeholder replacement ----
1628
1629    /// Replace all occurrences of `placeholder` with `replacement` throughout the document.
1630    ///
1631    /// Searches body paragraphs, tables (including nested), headers, and footers.
1632    /// Handles placeholders split across multiple runs. Returns the total number
1633    /// of replacements made.
1634    pub fn replace_text(&mut self, placeholder: &str, replacement: &str) -> usize {
1635        use rdocx_oxml::placeholder;
1636
1637        let mut count = 0;
1638
1639        // Replace in body paragraphs
1640        for content in &mut self.document.body.content {
1641            match content {
1642                BodyContent::Paragraph(p) => {
1643                    count += placeholder::replace_in_paragraph(p, placeholder, replacement);
1644                }
1645                BodyContent::Table(t) => {
1646                    count += placeholder::replace_in_table(t, placeholder, replacement);
1647                }
1648                _ => {} // Skip RawXml elements
1649            }
1650        }
1651
1652        // Replace in headers and footers
1653        if let Some(sect_pr) = self.document.body.sect_pr.as_ref() {
1654            let hdr_rel_ids: Vec<String> = sect_pr
1655                .header_refs
1656                .iter()
1657                .map(|r| r.rel_id.clone())
1658                .collect();
1659            let ftr_rel_ids: Vec<String> = sect_pr
1660                .footer_refs
1661                .iter()
1662                .map(|r| r.rel_id.clone())
1663                .collect();
1664
1665            for rel_id in hdr_rel_ids {
1666                if let Some(mut hf) = self.load_header_footer(&rel_id) {
1667                    let n =
1668                        placeholder::replace_in_header_footer(&mut hf, placeholder, replacement);
1669                    if n > 0 {
1670                        self.save_header_footer(&rel_id, &hf, true);
1671                        count += n;
1672                    }
1673                }
1674            }
1675            for rel_id in ftr_rel_ids {
1676                if let Some(mut hf) = self.load_header_footer(&rel_id) {
1677                    let n =
1678                        placeholder::replace_in_header_footer(&mut hf, placeholder, replacement);
1679                    if n > 0 {
1680                        self.save_header_footer(&rel_id, &hf, false);
1681                        count += n;
1682                    }
1683                }
1684            }
1685        }
1686
1687        // Flush document to package, then do raw XML pass for text boxes/shapes
1688        if let Ok(()) = self.flush_to_package() {
1689            count += self.replace_in_xml_parts(placeholder, replacement);
1690        }
1691
1692        count
1693    }
1694
1695    /// Replace multiple placeholders at once. Returns total replacements.
1696    pub fn replace_all(&mut self, replacements: &std::collections::HashMap<&str, &str>) -> usize {
1697        let mut count = 0;
1698        for (placeholder, replacement) in replacements {
1699            count += self.replace_text(placeholder, replacement);
1700        }
1701        count
1702    }
1703
1704    // ---- Regex replacement ----
1705
1706    /// Replace all regex matches with `replacement` throughout the document.
1707    ///
1708    /// The `replacement` string supports capture groups: `$1`, `$2`, etc.
1709    /// Searches body paragraphs, tables (including nested), headers, and footers.
1710    /// Returns the total number of replacements made, or an error if the regex is invalid.
1711    pub fn replace_regex(&mut self, pattern: &str, replacement: &str) -> Result<usize> {
1712        let re =
1713            regex::Regex::new(pattern).map_err(|e| Error::Other(format!("invalid regex: {e}")))?;
1714        Ok(self.replace_regex_compiled(&re, replacement))
1715    }
1716
1717    /// Replace multiple regex patterns at once. Returns total replacements.
1718    pub fn replace_all_regex(&mut self, patterns: &[(String, String)]) -> Result<usize> {
1719        let mut count = 0;
1720        for (pattern, replacement) in patterns {
1721            count += self.replace_regex(pattern, replacement)?;
1722        }
1723        Ok(count)
1724    }
1725
1726    /// Internal: replace using a pre-compiled regex.
1727    fn replace_regex_compiled(&mut self, re: &regex::Regex, replacement: &str) -> usize {
1728        use rdocx_oxml::placeholder;
1729
1730        let mut count = 0;
1731
1732        // Replace in body paragraphs and tables
1733        for content in &mut self.document.body.content {
1734            match content {
1735                BodyContent::Paragraph(p) => {
1736                    count += placeholder::replace_regex_in_paragraph(p, re, replacement);
1737                }
1738                BodyContent::Table(t) => {
1739                    count += placeholder::replace_regex_in_table(t, re, replacement);
1740                }
1741                _ => {}
1742            }
1743        }
1744
1745        // Replace in headers and footers
1746        if let Some(sect_pr) = self.document.body.sect_pr.as_ref() {
1747            let hdr_rel_ids: Vec<String> = sect_pr
1748                .header_refs
1749                .iter()
1750                .map(|r| r.rel_id.clone())
1751                .collect();
1752            let ftr_rel_ids: Vec<String> = sect_pr
1753                .footer_refs
1754                .iter()
1755                .map(|r| r.rel_id.clone())
1756                .collect();
1757
1758            for rel_id in hdr_rel_ids {
1759                if let Some(mut hf) = self.load_header_footer(&rel_id) {
1760                    let n = placeholder::replace_regex_in_header_footer(&mut hf, re, replacement);
1761                    if n > 0 {
1762                        self.save_header_footer(&rel_id, &hf, true);
1763                        count += n;
1764                    }
1765                }
1766            }
1767            for rel_id in ftr_rel_ids {
1768                if let Some(mut hf) = self.load_header_footer(&rel_id) {
1769                    let n = placeholder::replace_regex_in_header_footer(&mut hf, re, replacement);
1770                    if n > 0 {
1771                        self.save_header_footer(&rel_id, &hf, false);
1772                        count += n;
1773                    }
1774                }
1775            }
1776        }
1777
1778        count
1779    }
1780
1781    /// Load a header/footer part by its relationship ID.
1782    fn load_header_footer(&self, rel_id: &str) -> Option<CT_HdrFtr> {
1783        let rels = self.package.get_part_rels(&self.doc_part_name)?;
1784        let rel = rels.get_by_id(rel_id)?;
1785        let part_name = OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target);
1786        let xml = self.package.get_part(&part_name)?;
1787        CT_HdrFtr::from_xml(xml).ok()
1788    }
1789
1790    /// Run raw XML replacement on all XML parts (for text boxes, shapes, charts, etc.).
1791    ///
1792    /// This is called after the typed-model replacement and flush_to_package.
1793    fn replace_in_xml_parts(&mut self, placeholder: &str, replacement: &str) -> usize {
1794        use rdocx_oxml::placeholder::{replace_in_chart_xml, replace_in_xml_part};
1795
1796        let mut count = 0;
1797
1798        // Collect part names for XML parts to process (text boxes/shapes)
1799        let mut xml_parts: Vec<String> = vec![self.doc_part_name.clone()];
1800        if let Some(sect_pr) = self.document.body.sect_pr.as_ref()
1801            && let Some(rels) = self.package.get_part_rels(&self.doc_part_name)
1802        {
1803            for href in &sect_pr.header_refs {
1804                if let Some(rel) = rels.get_by_id(&href.rel_id) {
1805                    xml_parts.push(OpcPackage::resolve_rel_target(
1806                        &self.doc_part_name,
1807                        &rel.target,
1808                    ));
1809                }
1810            }
1811            for fref in &sect_pr.footer_refs {
1812                if let Some(rel) = rels.get_by_id(&fref.rel_id) {
1813                    xml_parts.push(OpcPackage::resolve_rel_target(
1814                        &self.doc_part_name,
1815                        &rel.target,
1816                    ));
1817                }
1818            }
1819        }
1820
1821        for part_name in xml_parts {
1822            if let Some(xml) = self.package.get_part(&part_name) {
1823                let xml = xml.to_vec();
1824                if let Ok((new_xml, n)) = replace_in_xml_part(&xml, placeholder, replacement)
1825                    && n > 0
1826                {
1827                    self.package.set_part(&part_name, new_xml);
1828                    count += n;
1829                }
1830            }
1831        }
1832
1833        // Collect chart part names
1834        let chart_parts: Vec<String> = self
1835            .package
1836            .get_part_rels(&self.doc_part_name)
1837            .map(|rels| {
1838                rels.get_all_by_type(rel_types::CHART)
1839                    .iter()
1840                    .map(|rel| OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target))
1841                    .collect()
1842            })
1843            .unwrap_or_default();
1844
1845        for part_name in chart_parts {
1846            if let Some(xml) = self.package.get_part(&part_name) {
1847                let xml = xml.to_vec();
1848                if let Ok((new_xml, n)) = replace_in_chart_xml(&xml, placeholder, replacement)
1849                    && n > 0
1850                {
1851                    self.package.set_part(&part_name, new_xml);
1852                    count += n;
1853                }
1854            }
1855        }
1856
1857        // Re-parse document from the (possibly modified) package XML
1858        if count > 0
1859            && let Some(doc_xml) = self.package.get_part(&self.doc_part_name)
1860            && let Ok(doc) = CT_Document::from_xml(doc_xml)
1861        {
1862            self.document = doc;
1863        }
1864
1865        count
1866    }
1867
1868    // ---- PDF conversion ----
1869
1870    /// Render the document to PDF bytes.
1871    ///
1872    /// This performs a full layout pass (font shaping, line breaking, pagination)
1873    /// and then renders the result to a PDF document.
1874    ///
1875    /// Font resolution order:
1876    /// 1. Fonts embedded in the DOCX file (word/fonts/)
1877    /// 2. System fonts
1878    /// 3. Bundled fonts (if `bundled-fonts` feature is enabled)
1879    pub fn to_pdf(&self) -> Result<Vec<u8>> {
1880        self.to_pdf_with_fonts(&[])
1881    }
1882
1883    /// Render the document to PDF bytes with user-provided font files.
1884    ///
1885    /// User-provided fonts take highest priority in font resolution.
1886    ///
1887    /// # Arguments
1888    /// * `font_files` - Additional font files to use. Each entry is `(family_name, font_bytes)`.
1889    ///
1890    /// Font resolution order:
1891    /// 1. User-provided fonts (this parameter)
1892    /// 2. Fonts embedded in the DOCX file (word/fonts/)
1893    /// 3. System fonts
1894    /// 4. Bundled fonts (if `bundled-fonts` feature is enabled)
1895    pub fn to_pdf_with_fonts(&self, font_files: &[(&str, &[u8])]) -> Result<Vec<u8>> {
1896        let mut input = self.build_layout_input();
1897        for (family, data) in font_files {
1898            input.fonts.push(rdocx_layout::FontFile {
1899                family: family.to_string(),
1900                data: data.to_vec(),
1901            });
1902        }
1903        let layout = rdocx_layout::layout_document(&input)?;
1904        Ok(rdocx_pdf::render_to_pdf(&layout))
1905    }
1906
1907    /// Save the document as a PDF file.
1908    pub fn save_pdf<P: AsRef<Path>>(&self, path: P) -> Result<()> {
1909        let pdf_bytes = self.to_pdf()?;
1910        std::fs::write(path, pdf_bytes)?;
1911        Ok(())
1912    }
1913
1914    /// Convert the document to a complete HTML document string.
1915    pub fn to_html(&self) -> String {
1916        let input = self.build_html_input();
1917        rdocx_html::to_html_document(&input, &rdocx_html::HtmlOptions::default())
1918    }
1919
1920    /// Convert the document to an HTML fragment (body content only, no `<html>` wrapper).
1921    pub fn to_html_fragment(&self) -> String {
1922        let input = self.build_html_input();
1923        rdocx_html::to_html_fragment(&input, &rdocx_html::HtmlOptions::default())
1924    }
1925
1926    /// Convert the document to Markdown.
1927    pub fn to_markdown(&self) -> String {
1928        let input = self.build_html_input();
1929        rdocx_html::to_markdown(&input)
1930    }
1931
1932    /// Build an HtmlInput from the document's current state.
1933    fn build_html_input(&self) -> rdocx_html::HtmlInput {
1934        use rdocx_opc::relationship::rel_types;
1935        use std::collections::HashMap;
1936
1937        let mut images: HashMap<String, rdocx_html::ImageData> = HashMap::new();
1938        let mut hyperlink_urls: HashMap<String, String> = HashMap::new();
1939
1940        if let Some(rels) = self.package.get_part_rels(&self.doc_part_name) {
1941            for rel in &rels.items {
1942                match rel.rel_type.as_str() {
1943                    t if t == rel_types::IMAGE => {
1944                        let part_name =
1945                            OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target);
1946                        if let Some(data) = self.package.get_part(&part_name) {
1947                            let content_type = guess_image_content_type(&part_name);
1948                            images.insert(
1949                                rel.id.clone(),
1950                                rdocx_html::ImageData {
1951                                    data: data.to_vec(),
1952                                    content_type,
1953                                },
1954                            );
1955                        }
1956                    }
1957                    t if t == rel_types::HYPERLINK => {
1958                        if rel.target_mode.as_ref().is_some_and(|m| m == "External") {
1959                            hyperlink_urls.insert(rel.id.clone(), rel.target.clone());
1960                        }
1961                    }
1962                    _ => {}
1963                }
1964            }
1965        }
1966
1967        rdocx_html::HtmlInput {
1968            document: self.document.clone(),
1969            styles: self.styles.clone(),
1970            numbering: self.numbering.clone(),
1971            images,
1972            hyperlink_urls,
1973        }
1974    }
1975
1976    /// Render a single page of the document to PNG bytes.
1977    ///
1978    /// # Arguments
1979    /// * `page_index` - 0-based page index
1980    /// * `dpi` - Resolution (72 = 1:1, 150 = standard, 300 = high quality)
1981    pub fn render_page_to_png(&self, page_index: usize, dpi: f64) -> Result<Option<Vec<u8>>> {
1982        let input = self.build_layout_input();
1983        let layout = rdocx_layout::layout_document(&input)?;
1984        Ok(rdocx_pdf::render_page_to_png(&layout, page_index, dpi))
1985    }
1986
1987    /// Render all pages of the document to PNG bytes.
1988    pub fn render_all_pages(&self, dpi: f64) -> Result<Vec<Vec<u8>>> {
1989        let input = self.build_layout_input();
1990        let layout = rdocx_layout::layout_document(&input)?;
1991        Ok(rdocx_pdf::render_all_pages(&layout, dpi))
1992    }
1993
1994    /// Build a LayoutInput from the document's current state.
1995    fn build_layout_input(&self) -> rdocx_layout::LayoutInput {
1996        use rdocx_layout::{ImageData, LayoutInput};
1997        use rdocx_opc::relationship::rel_types;
1998        use std::collections::HashMap;
1999
2000        let mut headers: HashMap<String, CT_HdrFtr> = HashMap::new();
2001        let mut footers: HashMap<String, CT_HdrFtr> = HashMap::new();
2002        let mut images: HashMap<String, ImageData> = HashMap::new();
2003        let mut hyperlink_urls: HashMap<String, String> = HashMap::new();
2004        let mut footnotes = None;
2005        let mut endnotes = None;
2006
2007        // Extract embedded fonts from the DOCX package
2008        let fonts = self.extract_embedded_fonts();
2009
2010        if let Some(rels) = self.package.get_part_rels(&self.doc_part_name) {
2011            for rel in &rels.items {
2012                match rel.rel_type.as_str() {
2013                    t if t == rel_types::HEADER => {
2014                        let part_name =
2015                            OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target);
2016                        if let Some(xml) = self.package.get_part(&part_name)
2017                            && let Ok(hf) = CT_HdrFtr::from_xml(xml)
2018                        {
2019                            headers.insert(rel.id.clone(), hf);
2020                        }
2021                    }
2022                    t if t == rel_types::FOOTER => {
2023                        let part_name =
2024                            OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target);
2025                        if let Some(xml) = self.package.get_part(&part_name)
2026                            && let Ok(hf) = CT_HdrFtr::from_xml(xml)
2027                        {
2028                            footers.insert(rel.id.clone(), hf);
2029                        }
2030                    }
2031                    t if t == rel_types::IMAGE => {
2032                        let part_name =
2033                            OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target);
2034                        if let Some(data) = self.package.get_part(&part_name) {
2035                            let content_type = guess_image_content_type(&part_name);
2036                            images.insert(
2037                                rel.id.clone(),
2038                                ImageData {
2039                                    data: data.to_vec(),
2040                                    content_type,
2041                                },
2042                            );
2043                        }
2044                    }
2045                    t if t == rel_types::HYPERLINK => {
2046                        if rel.target_mode.as_ref().is_some_and(|m| m == "External") {
2047                            hyperlink_urls.insert(rel.id.clone(), rel.target.clone());
2048                        }
2049                    }
2050                    t if t == rel_types::FOOTNOTES => {
2051                        let part_name =
2052                            OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target);
2053                        if let Some(xml) = self.package.get_part(&part_name) {
2054                            footnotes = rdocx_oxml::footnotes::CT_Footnotes::from_xml(xml).ok();
2055                        }
2056                    }
2057                    t if t == rel_types::ENDNOTES => {
2058                        let part_name =
2059                            OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target);
2060                        if let Some(xml) = self.package.get_part(&part_name) {
2061                            endnotes = rdocx_oxml::footnotes::CT_Footnotes::from_xml(xml).ok();
2062                        }
2063                    }
2064                    _ => {}
2065                }
2066            }
2067        }
2068
2069        // Parse theme if available
2070        let theme = self
2071            .package
2072            .get_part("/word/theme/theme1.xml")
2073            .and_then(|data| rdocx_oxml::theme::Theme::from_xml(data).ok());
2074
2075        LayoutInput {
2076            document: self.document.clone(),
2077            styles: self.styles.clone(),
2078            numbering: self.numbering.clone(),
2079            headers,
2080            footers,
2081            images,
2082            core_properties: self.core_properties.clone(),
2083            hyperlink_urls,
2084            footnotes,
2085            endnotes,
2086            theme,
2087            fonts,
2088        }
2089    }
2090
2091    /// Extract embedded fonts from the DOCX package.
2092    ///
2093    /// Word can embed fonts as `.odttf` (obfuscated TrueType) or regular `.ttf`/`.otf`
2094    /// files in the `word/fonts/` directory. ODTTF files have the first 32 bytes
2095    /// XOR'd with a 16-byte GUID derived from the font's relationship ID.
2096    fn extract_embedded_fonts(&self) -> Vec<rdocx_layout::FontFile> {
2097        let mut fonts = Vec::new();
2098
2099        // Look for font parts in word/fonts/ directory
2100        for (part_name, data) in &self.package.parts {
2101            let lower = part_name.to_lowercase();
2102            if !lower.contains("/word/fonts/") && !lower.contains("/word/font") {
2103                continue;
2104            }
2105
2106            // Determine font family name from the file name
2107            let file_name = part_name.rsplit('/').next().unwrap_or(part_name);
2108            let family = file_name.split('.').next().unwrap_or(file_name).to_string();
2109
2110            if lower.ends_with(".odttf") {
2111                // Deobfuscate ODTTF: XOR first 32 bytes with GUID from the file name
2112                if let Some(deobfuscated) = deobfuscate_odttf(data, file_name) {
2113                    fonts.push(rdocx_layout::FontFile {
2114                        family,
2115                        data: deobfuscated,
2116                    });
2117                }
2118            } else if lower.ends_with(".ttf") || lower.ends_with(".otf") || lower.ends_with(".ttc")
2119            {
2120                fonts.push(rdocx_layout::FontFile {
2121                    family,
2122                    data: data.clone(),
2123                });
2124            }
2125        }
2126
2127        fonts
2128    }
2129
2130    /// Load font files from a directory and return them as FontFile entries.
2131    ///
2132    /// This is useful for CLI tools that accept a `--font-dir` argument.
2133    /// Supports `.ttf`, `.otf`, and `.ttc` files.
2134    pub fn load_fonts_from_dir<P: AsRef<Path>>(dir: P) -> Vec<rdocx_layout::FontFile> {
2135        let mut fonts = Vec::new();
2136        let dir = dir.as_ref();
2137        if let Ok(entries) = std::fs::read_dir(dir) {
2138            for entry in entries.flatten() {
2139                let path = entry.path();
2140                let ext = path
2141                    .extension()
2142                    .and_then(|e| e.to_str())
2143                    .unwrap_or("")
2144                    .to_lowercase();
2145                if (ext == "ttf" || ext == "otf" || ext == "ttc")
2146                    && let Ok(data) = std::fs::read(&path)
2147                {
2148                    let family = path
2149                        .file_stem()
2150                        .and_then(|s| s.to_str())
2151                        .unwrap_or("Unknown")
2152                        .to_string();
2153                    fonts.push(rdocx_layout::FontFile { family, data });
2154                }
2155            }
2156        }
2157        fonts
2158    }
2159
2160    /// Save a header/footer part back to the OPC package.
2161    fn save_header_footer(&mut self, rel_id: &str, hf: &CT_HdrFtr, is_header: bool) {
2162        let part_name = {
2163            let rels = self.package.get_part_rels(&self.doc_part_name);
2164            rels.and_then(|r| r.get_by_id(rel_id))
2165                .map(|rel| OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target))
2166        };
2167        if let Some(part_name) = part_name {
2168            let xml = if is_header {
2169                hf.to_xml_header()
2170            } else {
2171                hf.to_xml_footer()
2172            };
2173            if let Ok(xml) = xml {
2174                self.package.set_part(&part_name, xml);
2175            }
2176        }
2177    }
2178
2179    // ---- Document Intelligence API ----
2180
2181    /// Get all headings in the document as (level, text) pairs.
2182    ///
2183    /// Detects heading paragraphs by their style ID (e.g. "Heading1", "Heading2").
2184    pub fn headings(&self) -> Vec<(u32, String)> {
2185        let mut result = Vec::new();
2186        for content in &self.document.body.content {
2187            if let BodyContent::Paragraph(p) = content
2188                && let Some(level) = Self::detect_heading_level_for_toc(p)
2189            {
2190                result.push((level, p.text()));
2191            }
2192        }
2193        result
2194    }
2195
2196    /// Get a hierarchical outline of the document headings.
2197    ///
2198    /// Returns a tree structure where each node contains the heading level,
2199    /// text, and children (sub-headings).
2200    pub fn document_outline(&self) -> Vec<OutlineNode> {
2201        let headings = self.headings();
2202        build_outline_tree(&headings)
2203    }
2204
2205    /// Get information about all images in the document.
2206    ///
2207    /// Returns metadata for each inline and anchored image found in body paragraphs.
2208    pub fn images(&self) -> Vec<ImageInfo> {
2209        let mut result = Vec::new();
2210
2211        for content in &self.document.body.content {
2212            Self::collect_images_from_content(content, &mut result);
2213        }
2214        result
2215    }
2216
2217    fn collect_images_from_content(content: &BodyContent, result: &mut Vec<ImageInfo>) {
2218        match content {
2219            BodyContent::Paragraph(p) => {
2220                for run in &p.runs {
2221                    for rc in &run.content {
2222                        if let RunContent::Drawing(drawing) = rc {
2223                            if let Some(inline) = &drawing.inline {
2224                                result.push(ImageInfo {
2225                                    embed_id: inline.embed_id.clone(),
2226                                    name: inline.name.clone(),
2227                                    description: inline.description.clone(),
2228                                    width_emu: inline.extent_cx.0,
2229                                    height_emu: inline.extent_cy.0,
2230                                    is_anchor: false,
2231                                });
2232                            }
2233                            if let Some(anchor) = &drawing.anchor {
2234                                result.push(ImageInfo {
2235                                    embed_id: anchor.embed_id.clone(),
2236                                    name: anchor.name.clone(),
2237                                    description: anchor.description.clone(),
2238                                    width_emu: anchor.extent_cx.0,
2239                                    height_emu: anchor.extent_cy.0,
2240                                    is_anchor: true,
2241                                });
2242                            }
2243                        }
2244                    }
2245                }
2246            }
2247            BodyContent::Table(tbl) => {
2248                for row in &tbl.rows {
2249                    for cell in &row.cells {
2250                        for cc in &cell.content {
2251                            match cc {
2252                                rdocx_oxml::table::CellContent::Paragraph(p) => {
2253                                    Self::collect_images_from_content(
2254                                        &BodyContent::Paragraph(p.clone()),
2255                                        result,
2256                                    );
2257                                }
2258                                rdocx_oxml::table::CellContent::Table(nested) => {
2259                                    Self::collect_images_from_content(
2260                                        &BodyContent::Table(nested.clone()),
2261                                        result,
2262                                    );
2263                                }
2264                            }
2265                        }
2266                    }
2267                }
2268            }
2269            BodyContent::RawXml(_) => {}
2270        }
2271    }
2272
2273    /// Get information about all hyperlinks in the document.
2274    ///
2275    /// Resolves hyperlink relationship IDs to their target URLs where possible.
2276    pub fn links(&self) -> Vec<LinkInfo> {
2277        use rdocx_opc::relationship::rel_types;
2278
2279        // Build a map of hyperlink rel_id -> target URL
2280        let mut url_map = std::collections::HashMap::new();
2281        if let Some(rels) = self.package.get_part_rels(&self.doc_part_name) {
2282            for rel in &rels.items {
2283                if rel.rel_type == rel_types::HYPERLINK
2284                    && rel.target_mode.as_ref().is_some_and(|m| m == "External")
2285                {
2286                    url_map.insert(rel.id.clone(), rel.target.clone());
2287                }
2288            }
2289        }
2290
2291        let mut result = Vec::new();
2292        for content in &self.document.body.content {
2293            if let BodyContent::Paragraph(p) = content {
2294                for hl in &p.hyperlinks {
2295                    let text: String = p.runs[hl.run_start..hl.run_end]
2296                        .iter()
2297                        .map(|r| r.text())
2298                        .collect::<Vec<_>>()
2299                        .join("");
2300
2301                    let url = hl.rel_id.as_ref().and_then(|id| url_map.get(id)).cloned();
2302
2303                    result.push(LinkInfo {
2304                        text,
2305                        url,
2306                        anchor: hl.anchor.clone(),
2307                        rel_id: hl.rel_id.clone(),
2308                    });
2309                }
2310            }
2311        }
2312        result
2313    }
2314
2315    /// Count the number of words in the document.
2316    ///
2317    /// Counts whitespace-separated tokens across all paragraphs (including
2318    /// paragraphs inside table cells).
2319    pub fn word_count(&self) -> usize {
2320        let mut count = 0;
2321        for content in &self.document.body.content {
2322            count += Self::word_count_in_content(content);
2323        }
2324        count
2325    }
2326
2327    fn word_count_in_content(content: &BodyContent) -> usize {
2328        match content {
2329            BodyContent::Paragraph(p) => p.text().split_whitespace().count(),
2330            BodyContent::Table(tbl) => {
2331                let mut count = 0;
2332                for row in &tbl.rows {
2333                    for cell in &row.cells {
2334                        for cc in &cell.content {
2335                            match cc {
2336                                rdocx_oxml::table::CellContent::Paragraph(p) => {
2337                                    count += p.text().split_whitespace().count();
2338                                }
2339                                rdocx_oxml::table::CellContent::Table(nested) => {
2340                                    count += Self::word_count_in_content(&BodyContent::Table(
2341                                        nested.clone(),
2342                                    ));
2343                                }
2344                            }
2345                        }
2346                    }
2347                }
2348                count
2349            }
2350            BodyContent::RawXml(_) => 0,
2351        }
2352    }
2353
2354    /// Audit the document for accessibility issues.
2355    ///
2356    /// Checks for common problems: missing image alt text, heading level gaps,
2357    /// empty paragraphs, missing document metadata.
2358    pub fn audit_accessibility(&self) -> Vec<AccessibilityIssue> {
2359        let mut issues = Vec::new();
2360
2361        // Check: missing document title
2362        if self.title().is_none() {
2363            issues.push(AccessibilityIssue {
2364                severity: IssueSeverity::Warning,
2365                message: "Document has no title".to_string(),
2366            });
2367        }
2368
2369        // Check: missing document language (author as a proxy for basic metadata)
2370        if self.author().is_none() {
2371            issues.push(AccessibilityIssue {
2372                severity: IssueSeverity::Info,
2373                message: "Document has no author".to_string(),
2374            });
2375        }
2376
2377        // Check: images without alt text
2378        let images = self.images();
2379        for img in &images {
2380            let has_alt = img
2381                .description
2382                .as_ref()
2383                .is_some_and(|d| !d.is_empty() && d != "Background");
2384            if !has_alt {
2385                let name = img
2386                    .name
2387                    .as_deref()
2388                    .or(Some(&img.embed_id))
2389                    .unwrap_or("unknown");
2390                issues.push(AccessibilityIssue {
2391                    severity: IssueSeverity::Error,
2392                    message: format!("Image \"{name}\" has no alt text"),
2393                });
2394            }
2395        }
2396
2397        // Check: heading level gaps
2398        let headings = self.headings();
2399        let mut prev_level: Option<u32> = None;
2400        for (level, text) in &headings {
2401            if let Some(prev) = prev_level
2402                && *level > prev + 1
2403            {
2404                issues.push(AccessibilityIssue {
2405                    severity: IssueSeverity::Warning,
2406                    message: format!(
2407                        "Heading level gap: h{prev} -> h{level} (\"{}\")",
2408                        truncate_str(text, 40)
2409                    ),
2410                });
2411            }
2412            prev_level = Some(*level);
2413        }
2414
2415        // Check: excessive empty paragraphs
2416        let mut consecutive_empty = 0u32;
2417        for content in &self.document.body.content {
2418            if let BodyContent::Paragraph(p) = content {
2419                if p.text().trim().is_empty() {
2420                    consecutive_empty += 1;
2421                    if consecutive_empty >= 3 {
2422                        issues.push(AccessibilityIssue {
2423                            severity: IssueSeverity::Info,
2424                            message: format!(
2425                                "{consecutive_empty} consecutive empty paragraphs (consider using spacing instead)"
2426                            ),
2427                        });
2428                    }
2429                } else {
2430                    consecutive_empty = 0;
2431                }
2432            } else {
2433                consecutive_empty = 0;
2434            }
2435        }
2436
2437        issues
2438    }
2439}
2440
2441impl Default for Document {
2442    fn default() -> Self {
2443        Self::new()
2444    }
2445}
2446
2447/// Guess image content type from the part name extension.
2448fn guess_image_content_type(part_name: &str) -> String {
2449    let ext = part_name.rsplit('.').next().unwrap_or("").to_lowercase();
2450    match ext.as_str() {
2451        "png" => "image/png",
2452        "jpg" | "jpeg" => "image/jpeg",
2453        "gif" => "image/gif",
2454        "bmp" => "image/bmp",
2455        "tiff" | "tif" => "image/tiff",
2456        _ => "image/png",
2457    }
2458    .to_string()
2459}
2460
2461/// A node in the document outline tree.
2462#[derive(Debug, Clone, PartialEq)]
2463pub struct OutlineNode {
2464    /// The heading level (1-9).
2465    pub level: u32,
2466    /// The heading text.
2467    pub text: String,
2468    /// Child headings (sub-headings).
2469    pub children: Vec<OutlineNode>,
2470}
2471
2472/// Information about an image in the document.
2473#[derive(Debug, Clone, PartialEq)]
2474pub struct ImageInfo {
2475    /// The relationship ID for the embedded image.
2476    pub embed_id: String,
2477    /// Optional name attribute.
2478    pub name: Option<String>,
2479    /// Optional description (alt text).
2480    pub description: Option<String>,
2481    /// Width in EMUs (English Metric Units, 914400 EMU = 1 inch).
2482    pub width_emu: i64,
2483    /// Height in EMUs.
2484    pub height_emu: i64,
2485    /// Whether this is an anchored (floating) image vs inline.
2486    pub is_anchor: bool,
2487}
2488
2489/// Information about a hyperlink in the document.
2490#[derive(Debug, Clone, PartialEq)]
2491pub struct LinkInfo {
2492    /// The display text of the hyperlink.
2493    pub text: String,
2494    /// The resolved target URL (if external).
2495    pub url: Option<String>,
2496    /// Internal document anchor (if any).
2497    pub anchor: Option<String>,
2498    /// The relationship ID.
2499    pub rel_id: Option<String>,
2500}
2501
2502/// Severity level for accessibility issues.
2503#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2504pub enum IssueSeverity {
2505    /// Informational suggestion.
2506    Info,
2507    /// Potential problem.
2508    Warning,
2509    /// Definite accessibility barrier.
2510    Error,
2511}
2512
2513/// An accessibility issue found during audit.
2514#[derive(Debug, Clone, PartialEq)]
2515pub struct AccessibilityIssue {
2516    /// How severe the issue is.
2517    pub severity: IssueSeverity,
2518    /// Human-readable description of the issue.
2519    pub message: String,
2520}
2521
2522/// Build a hierarchical outline tree from a flat list of (level, text) headings.
2523fn build_outline_tree(headings: &[(u32, String)]) -> Vec<OutlineNode> {
2524    let mut root: Vec<OutlineNode> = Vec::new();
2525    let mut stack: Vec<(u32, usize)> = Vec::new(); // (level, index in parent's children)
2526
2527    for (level, text) in headings {
2528        let node = OutlineNode {
2529            level: *level,
2530            text: text.clone(),
2531            children: Vec::new(),
2532        };
2533
2534        // Pop stack until we find a parent with a lower level
2535        while let Some(&(stack_level, _)) = stack.last() {
2536            if stack_level >= *level {
2537                stack.pop();
2538            } else {
2539                break;
2540            }
2541        }
2542
2543        if stack.is_empty() {
2544            root.push(node);
2545            let idx = root.len() - 1;
2546            stack.push((*level, idx));
2547        } else {
2548            // Navigate to the correct parent in the tree
2549            let target = get_outline_parent_mut(&mut root, &stack);
2550            target.children.push(node);
2551            let idx = target.children.len() - 1;
2552            stack.push((*level, idx));
2553        }
2554    }
2555
2556    root
2557}
2558
2559/// Navigate to the parent node indicated by the stack.
2560fn get_outline_parent_mut<'a>(
2561    root: &'a mut [OutlineNode],
2562    stack: &[(u32, usize)],
2563) -> &'a mut OutlineNode {
2564    let mut current = &mut root[stack[0].1];
2565    for &(_, idx) in &stack[1..] {
2566        current = &mut current.children[idx];
2567    }
2568    current
2569}
2570
2571/// Truncate a string to a maximum length, appending "..." if truncated.
2572fn truncate_str(s: &str, max_len: usize) -> String {
2573    if s.len() <= max_len {
2574        s.to_string()
2575    } else {
2576        let truncated: String = s.chars().take(max_len.saturating_sub(3)).collect();
2577        format!("{truncated}...")
2578    }
2579}
2580
2581/// Deobfuscate an ODTTF (obfuscated TrueType) font file.
2582///
2583/// Word embeds fonts as `.odttf` files where the first 32 bytes are XOR'd
2584/// with a 16-byte GUID derived from the file name. The file name follows
2585/// the pattern `{GUID}.odttf` where GUID is a hex string without hyphens.
2586fn deobfuscate_odttf(data: &[u8], file_name: &str) -> Option<Vec<u8>> {
2587    if data.len() < 32 {
2588        return None;
2589    }
2590
2591    // Extract GUID from file name: "00112233-4455-6677-8899-AABBCCDDEEFF.odttf"
2592    // or "{00112233-4455-6677-8899-AABBCCDDEEFF}.odttf"
2593    let name = file_name
2594        .split('.')
2595        .next()
2596        .unwrap_or("")
2597        .trim_start_matches('{')
2598        .trim_end_matches('}');
2599
2600    // Remove hyphens and parse as hex bytes
2601    let hex: String = name.chars().filter(|c| c.is_ascii_hexdigit()).collect();
2602    if hex.len() != 32 {
2603        return None;
2604    }
2605
2606    let mut guid = [0u8; 16];
2607    for (i, byte) in guid.iter_mut().enumerate() {
2608        *byte = u8::from_str_radix(&hex[i * 2..i * 2 + 2], 16).ok()?;
2609    }
2610
2611    // Per OOXML spec, the GUID bytes are reordered for XOR key:
2612    // bytes 0-3 reversed, 4-5 reversed, 6-7 reversed, 8-15 as-is
2613    let key: [u8; 16] = [
2614        guid[3], guid[2], guid[1], guid[0], guid[5], guid[4], guid[7], guid[6], guid[8], guid[9],
2615        guid[10], guid[11], guid[12], guid[13], guid[14], guid[15],
2616    ];
2617
2618    let mut result = data.to_vec();
2619    // XOR first 32 bytes with the 16-byte key (repeated twice)
2620    for i in 0..32 {
2621        result[i] ^= key[i % 16];
2622    }
2623
2624    Some(result)
2625}
2626
2627#[cfg(test)]
2628mod tests {
2629    use super::*;
2630    use crate::paragraph::Alignment;
2631    use rdocx_oxml::units::{HalfPoint, Twips};
2632
2633    #[test]
2634    fn create_new_document() {
2635        let doc = Document::new();
2636        assert_eq!(doc.paragraph_count(), 0);
2637        assert!(doc.section_properties().is_some());
2638    }
2639
2640    #[test]
2641    fn add_paragraphs() {
2642        let mut doc = Document::new();
2643        doc.add_paragraph("First paragraph");
2644        doc.add_paragraph("Second paragraph");
2645        assert_eq!(doc.paragraph_count(), 2);
2646
2647        let paras = doc.paragraphs();
2648        assert_eq!(paras[0].text(), "First paragraph");
2649        assert_eq!(paras[1].text(), "Second paragraph");
2650    }
2651
2652    #[test]
2653    fn paragraph_formatting() {
2654        let mut doc = Document::new();
2655        doc.add_paragraph("Centered").alignment(Alignment::Center);
2656
2657        let paras = doc.paragraphs();
2658        assert_eq!(paras[0].alignment(), Some(Alignment::Center));
2659    }
2660
2661    #[test]
2662    fn run_formatting() {
2663        let mut doc = Document::new();
2664        let mut para = doc.add_paragraph("");
2665        para.add_run("Bold text").bold(true).size(14.0);
2666
2667        let paras = doc.paragraphs();
2668        let runs: Vec<_> = paras[0].runs().collect();
2669        assert!(runs[0].is_bold());
2670        assert_eq!(runs[0].size(), Some(14.0));
2671    }
2672
2673    #[test]
2674    fn round_trip_in_memory() {
2675        let mut doc = Document::new();
2676        doc.add_paragraph("Hello, World!");
2677        doc.add_paragraph("Second paragraph")
2678            .alignment(Alignment::Center);
2679
2680        let bytes = doc.to_bytes().unwrap();
2681        let doc2 = Document::from_bytes(&bytes).unwrap();
2682
2683        assert_eq!(doc2.paragraph_count(), 2);
2684        let paras = doc2.paragraphs();
2685        assert_eq!(paras[0].text(), "Hello, World!");
2686        assert_eq!(paras[1].text(), "Second paragraph");
2687        assert_eq!(paras[1].alignment(), Some(Alignment::Center));
2688    }
2689
2690    #[test]
2691    fn styles_present() {
2692        let doc = Document::new();
2693        assert!(doc.style("Normal").is_some());
2694        assert!(doc.style("Heading1").is_some());
2695    }
2696
2697    #[test]
2698    fn paragraph_with_style() {
2699        let mut doc = Document::new();
2700        doc.add_paragraph("Title").style("Heading1");
2701
2702        let paras = doc.paragraphs();
2703        assert_eq!(paras[0].style_id(), Some("Heading1"));
2704    }
2705
2706    #[test]
2707    fn multiple_runs_in_paragraph() {
2708        let mut doc = Document::new();
2709        let mut para = doc.add_paragraph("");
2710        para.add_run("Normal ");
2711        para.add_run("bold ").bold(true);
2712        para.add_run("italic").italic(true);
2713
2714        let paras = doc.paragraphs();
2715        assert_eq!(paras[0].text(), "Normal bold italic");
2716        let runs: Vec<_> = paras[0].runs().collect();
2717        assert_eq!(runs.len(), 3);
2718        assert!(!runs[0].is_bold());
2719        assert!(runs[1].is_bold());
2720        assert!(runs[2].is_italic());
2721    }
2722
2723    #[test]
2724    fn add_custom_style() {
2725        let mut doc = Document::new();
2726        doc.add_style(StyleBuilder::paragraph("MyCustom", "My Custom Style").based_on("Normal"));
2727        assert!(doc.style("MyCustom").is_some());
2728        let s = doc.style("MyCustom").unwrap();
2729        assert_eq!(s.name(), Some("My Custom Style"));
2730        assert_eq!(s.based_on(), Some("Normal"));
2731    }
2732
2733    #[test]
2734    fn resolve_style_properties() {
2735        let doc = Document::new();
2736        // Heading1 should inherit from docDefaults and have its own overrides
2737        let ppr = doc.resolve_paragraph_properties(Some("Heading1"));
2738        assert_eq!(ppr.keep_next, Some(true));
2739        assert_eq!(ppr.space_before, Some(Twips(240)));
2740
2741        // Default (None) should apply Normal style
2742        let ppr = doc.resolve_paragraph_properties(None);
2743        assert_eq!(ppr.space_after, Some(Twips(160)));
2744    }
2745
2746    #[test]
2747    fn resolve_run_style_properties() {
2748        let doc = Document::new();
2749        let rpr = doc.resolve_run_properties(Some("Heading1"), None);
2750        assert_eq!(rpr.bold, Some(true));
2751        assert_eq!(rpr.sz, Some(HalfPoint(32)));
2752        assert_eq!(rpr.font_ascii, Some("Calibri".to_string()));
2753    }
2754
2755    #[test]
2756    fn set_landscape() {
2757        let mut doc = Document::new();
2758        doc.set_landscape();
2759        let sect = doc.section_properties().unwrap();
2760        assert_eq!(sect.orientation, Some(ST_PageOrientation::Landscape));
2761        // Width should be > height in landscape
2762        assert!(sect.page_width.unwrap().0 > sect.page_height.unwrap().0);
2763    }
2764
2765    #[test]
2766    fn set_margins() {
2767        let mut doc = Document::new();
2768        doc.set_margins(
2769            Length::inches(0.5),
2770            Length::inches(0.75),
2771            Length::inches(0.5),
2772            Length::inches(0.75),
2773        );
2774        let sect = doc.section_properties().unwrap();
2775        assert_eq!(sect.margin_top, Some(Twips(720)));
2776        assert_eq!(sect.margin_right, Some(Twips(1080)));
2777    }
2778
2779    #[test]
2780    fn set_columns() {
2781        let mut doc = Document::new();
2782        doc.set_columns(2, Length::inches(0.5));
2783        let sect = doc.section_properties().unwrap();
2784        let cols = sect.columns.as_ref().unwrap();
2785        assert_eq!(cols.num, Some(2));
2786        assert_eq!(cols.space, Some(Twips(720)));
2787        assert_eq!(cols.equal_width, Some(true));
2788    }
2789
2790    #[test]
2791    fn set_page_size() {
2792        let mut doc = Document::new();
2793        doc.set_page_size(Length::cm(21.0), Length::cm(29.7));
2794        let sect = doc.section_properties().unwrap();
2795        // A4: ~11906tw x ~16838tw
2796        let w = sect.page_width.unwrap().0;
2797        let h = sect.page_height.unwrap().0;
2798        assert!((w - 11906).abs() < 5);
2799        assert!((h - 16838).abs() < 5);
2800    }
2801
2802    #[test]
2803    fn set_different_first_page() {
2804        let mut doc = Document::new();
2805        doc.set_different_first_page(true);
2806        assert_eq!(doc.section_properties().unwrap().title_pg, Some(true));
2807    }
2808
2809    #[test]
2810    fn content_insertion_api() {
2811        let mut doc = Document::new();
2812        doc.add_paragraph("First");
2813        doc.add_paragraph("Third");
2814
2815        // Insert in middle
2816        doc.insert_paragraph(1, "Second");
2817        assert_eq!(doc.content_count(), 3);
2818        let paras = doc.paragraphs();
2819        assert_eq!(paras[0].text(), "First");
2820        assert_eq!(paras[1].text(), "Second");
2821        assert_eq!(paras[2].text(), "Third");
2822
2823        // Insert at beginning
2824        doc.insert_paragraph(0, "Zeroth");
2825        assert_eq!(doc.content_count(), 4);
2826        assert_eq!(doc.paragraphs()[0].text(), "Zeroth");
2827    }
2828
2829    #[test]
2830    fn find_content_index_and_remove() {
2831        let mut doc = Document::new();
2832        doc.add_paragraph("Hello");
2833        doc.add_paragraph("{{PLACEHOLDER}}");
2834        doc.add_paragraph("World");
2835
2836        assert_eq!(doc.find_content_index("{{PLACEHOLDER}}"), Some(1));
2837        assert_eq!(doc.find_content_index("NONEXISTENT"), None);
2838
2839        assert!(doc.remove_content(1));
2840        assert_eq!(doc.content_count(), 2);
2841        assert_eq!(doc.paragraphs()[1].text(), "World");
2842
2843        // Out of bounds
2844        assert!(!doc.remove_content(10));
2845    }
2846
2847    #[test]
2848    fn insert_table_at_index() {
2849        let mut doc = Document::new();
2850        doc.add_paragraph("Before");
2851        doc.add_paragraph("After");
2852
2853        doc.insert_table(1, 2, 3);
2854        assert_eq!(doc.content_count(), 3);
2855        assert_eq!(doc.table_count(), 1);
2856        // Paragraphs are still in correct order
2857        let paras = doc.paragraphs();
2858        assert_eq!(paras[0].text(), "Before");
2859        assert_eq!(paras[1].text(), "After");
2860    }
2861
2862    #[test]
2863    fn replace_text_in_body() {
2864        let mut doc = Document::new();
2865        doc.add_paragraph("Hello {{name}}!");
2866        doc.add_paragraph("Welcome to {{company}}.");
2867
2868        let count = doc.replace_text("{{name}}", "Alice");
2869        assert_eq!(count, 1);
2870        assert_eq!(doc.paragraphs()[0].text(), "Hello Alice!");
2871
2872        let count = doc.replace_text("{{company}}", "Acme");
2873        assert_eq!(count, 1);
2874        assert_eq!(doc.paragraphs()[1].text(), "Welcome to Acme.");
2875    }
2876
2877    #[test]
2878    fn replace_text_in_header_and_footer() {
2879        let mut doc = Document::new();
2880        doc.set_header("Header: {{title}}");
2881        doc.set_footer("Footer: {{title}}");
2882        doc.add_paragraph("Body: {{title}}");
2883
2884        let count = doc.replace_text("{{title}}", "My Doc");
2885        assert_eq!(count, 3);
2886
2887        assert_eq!(doc.paragraphs()[0].text(), "Body: My Doc");
2888        assert_eq!(doc.header_text().unwrap(), "Header: My Doc");
2889        assert_eq!(doc.footer_text().unwrap(), "Footer: My Doc");
2890    }
2891
2892    #[test]
2893    fn replace_all_batch() {
2894        let mut doc = Document::new();
2895        doc.add_paragraph("{{a}} and {{b}}");
2896
2897        let mut map = std::collections::HashMap::new();
2898        map.insert("{{a}}", "X");
2899        map.insert("{{b}}", "Y");
2900        let count = doc.replace_all(&map);
2901        assert_eq!(count, 2);
2902        assert_eq!(doc.paragraphs()[0].text(), "X and Y");
2903    }
2904
2905    #[test]
2906    fn template_workflow_round_trip() {
2907        let mut doc = Document::new();
2908        doc.add_paragraph("Company: {{company}}");
2909        doc.add_paragraph("Date: {{date}}");
2910
2911        doc.replace_text("{{company}}", "Acme Corp");
2912        doc.replace_text("{{date}}", "2026-02-22");
2913
2914        // Round-trip
2915        let bytes = doc.to_bytes().unwrap();
2916        let doc2 = Document::from_bytes(&bytes).unwrap();
2917        assert_eq!(doc2.paragraphs()[0].text(), "Company: Acme Corp");
2918        assert_eq!(doc2.paragraphs()[1].text(), "Date: 2026-02-22");
2919    }
2920
2921    #[test]
2922    fn add_background_image_round_trip() {
2923        // Create a minimal 1x1 PNG
2924        let png_data: Vec<u8> = vec![
2925            0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, // PNG signature
2926            0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52, // IHDR chunk
2927            0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, // 1x1
2928            0x08, 0x02, 0x00, 0x00, 0x00, 0x90, 0x77, 0x53, 0xde, 0x00, 0x00, 0x00, 0x0c, 0x49,
2929            0x44, 0x41, 0x54, // IDAT chunk
2930            0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0xe2, 0x21,
2931            0xbc, 0x33, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, 0x44, // IEND chunk
2932            0xae, 0x42, 0x60, 0x82,
2933        ];
2934
2935        let mut doc = Document::new();
2936        doc.add_paragraph("Hello World");
2937        doc.add_background_image(&png_data, "bg.png");
2938
2939        // Background image paragraph should be at index 0
2940        assert_eq!(doc.content_count(), 2);
2941
2942        // Round-trip
2943        let bytes = doc.to_bytes().unwrap();
2944        let doc2 = Document::from_bytes(&bytes).unwrap();
2945
2946        // Should still have 2 content items
2947        assert_eq!(doc2.content_count(), 2);
2948        // The second paragraph should have our text
2949        assert_eq!(doc2.paragraphs().last().unwrap().text(), "Hello World");
2950    }
2951
2952    #[test]
2953    fn add_anchored_image() {
2954        let png_data: Vec<u8> = vec![
2955            0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00, 0x00, 0x0d, 0x49, 0x48,
2956            0x44, 0x52, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x08, 0x02, 0x00, 0x00,
2957            0x00, 0x90, 0x77, 0x53, 0xde, 0x00, 0x00, 0x00, 0x0c, 0x49, 0x44, 0x41, 0x54, 0x08,
2958            0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0xe2, 0x21, 0xbc,
2959            0x33, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82,
2960        ];
2961
2962        let mut doc = Document::new();
2963        doc.add_paragraph("Content");
2964        doc.add_anchored_image(
2965            &png_data,
2966            "overlay.png",
2967            Length::inches(4.0),
2968            Length::inches(3.0),
2969            false,
2970        );
2971        assert_eq!(doc.content_count(), 2);
2972    }
2973
2974    #[test]
2975    fn insert_toc_basic() {
2976        let mut doc = Document::new();
2977        doc.add_paragraph("Introduction");
2978        doc.add_paragraph("Chapter 1").style("Heading1");
2979        doc.add_paragraph("Some text in chapter 1.");
2980        doc.add_paragraph("Section 1.1").style("Heading2");
2981        doc.add_paragraph("Text in section 1.1.");
2982        doc.add_paragraph("Chapter 2").style("Heading1");
2983        doc.add_paragraph("Text in chapter 2.");
2984
2985        // Before TOC: 7 content elements
2986        assert_eq!(doc.content_count(), 7);
2987
2988        // Insert TOC at index 0 with max_level 2
2989        doc.insert_toc(0, 2);
2990
2991        // TOC adds: 1 title + 3 heading entries (Ch1, Sec1.1, Ch2) = 4 paragraphs
2992        assert_eq!(doc.content_count(), 11);
2993
2994        // Verify TOC title
2995        let paras = doc.paragraphs();
2996        assert_eq!(paras[0].text(), "Table of Contents");
2997
2998        // Verify TOC entries contain heading text
2999        assert_eq!(paras[1].text(), "Chapter 1\t");
3000        assert_eq!(paras[2].text(), "Section 1.1\t");
3001        assert_eq!(paras[3].text(), "Chapter 2\t");
3002
3003        // Verify round-trip: save and re-open
3004        let bytes = doc.to_bytes().expect("should serialize");
3005        let doc2 = Document::from_bytes(&bytes).expect("should open");
3006        assert_eq!(doc2.content_count(), 11);
3007        let paras2 = doc2.paragraphs();
3008        assert_eq!(paras2[0].text(), "Table of Contents");
3009    }
3010
3011    #[test]
3012    fn append_documents() {
3013        let mut doc_a = Document::new();
3014        doc_a.add_paragraph("Paragraph A1");
3015        doc_a.add_paragraph("Paragraph A2");
3016
3017        let mut doc_b = Document::new();
3018        doc_b.add_paragraph("Paragraph B1");
3019        doc_b.add_paragraph("Paragraph B2");
3020        doc_b.add_paragraph("Paragraph B3");
3021
3022        assert_eq!(doc_a.content_count(), 2);
3023        doc_a.append(&doc_b);
3024        assert_eq!(doc_a.content_count(), 5);
3025
3026        let paras = doc_a.paragraphs();
3027        assert_eq!(paras[0].text(), "Paragraph A1");
3028        assert_eq!(paras[1].text(), "Paragraph A2");
3029        assert_eq!(paras[2].text(), "Paragraph B1");
3030        assert_eq!(paras[3].text(), "Paragraph B2");
3031        assert_eq!(paras[4].text(), "Paragraph B3");
3032
3033        // Verify round-trip
3034        let bytes = doc_a.to_bytes().expect("serialize");
3035        let reopened = Document::from_bytes(&bytes).expect("open");
3036        assert_eq!(reopened.content_count(), 5);
3037    }
3038
3039    #[test]
3040    fn append_with_section_break() {
3041        let mut doc_a = Document::new();
3042        doc_a.add_paragraph("A1");
3043
3044        let mut doc_b = Document::new();
3045        doc_b.add_paragraph("B1");
3046
3047        doc_a.append_with_break(&doc_b, crate::SectionBreak::Continuous);
3048        // 1 original + 1 section break paragraph + 1 merged = 3
3049        assert_eq!(doc_a.content_count(), 3);
3050    }
3051
3052    #[test]
3053    fn insert_document_at_index() {
3054        let mut doc_a = Document::new();
3055        doc_a.add_paragraph("First");
3056        doc_a.add_paragraph("Last");
3057
3058        let mut doc_b = Document::new();
3059        doc_b.add_paragraph("Middle 1");
3060        doc_b.add_paragraph("Middle 2");
3061
3062        doc_a.insert_document(1, &doc_b);
3063        assert_eq!(doc_a.content_count(), 4);
3064
3065        let paras = doc_a.paragraphs();
3066        assert_eq!(paras[0].text(), "First");
3067        assert_eq!(paras[1].text(), "Middle 1");
3068        assert_eq!(paras[2].text(), "Middle 2");
3069        assert_eq!(paras[3].text(), "Last");
3070    }
3071
3072    #[test]
3073    fn merge_deduplicates_styles() {
3074        let mut doc_a = Document::new();
3075        doc_a.add_paragraph("A").style("Heading1");
3076
3077        let mut doc_b = Document::new();
3078        doc_b.add_paragraph("B").style("Heading1");
3079        doc_b.add_style(
3080            crate::style::StyleBuilder::paragraph("CustomB", "Custom B").based_on("Normal"),
3081        );
3082        doc_b.add_paragraph("C").style("CustomB");
3083
3084        let styles_before = doc_a.styles.styles.len();
3085        doc_a.append(&doc_b);
3086        let styles_after = doc_a.styles.styles.len();
3087
3088        // Heading1 already existed, so only CustomB should be added
3089        assert_eq!(styles_after, styles_before + 1);
3090    }
3091
3092    #[test]
3093    fn headings_and_outline() {
3094        let mut doc = Document::new();
3095        doc.add_paragraph("Intro");
3096        doc.add_paragraph("Chapter 1").style("Heading1");
3097        doc.add_paragraph("Section 1.1").style("Heading2");
3098        doc.add_paragraph("Section 1.2").style("Heading2");
3099        doc.add_paragraph("Chapter 2").style("Heading1");
3100        doc.add_paragraph("Section 2.1").style("Heading2");
3101        doc.add_paragraph("Sub 2.1.1").style("Heading3");
3102
3103        let headings = doc.headings();
3104        assert_eq!(headings.len(), 6);
3105        assert_eq!(headings[0], (1, "Chapter 1".to_string()));
3106        assert_eq!(headings[1], (2, "Section 1.1".to_string()));
3107        assert_eq!(headings[5], (3, "Sub 2.1.1".to_string()));
3108
3109        let outline = doc.document_outline();
3110        assert_eq!(outline.len(), 2); // Two h1 nodes
3111        assert_eq!(outline[0].text, "Chapter 1");
3112        assert_eq!(outline[0].children.len(), 2); // 1.1 and 1.2
3113        assert_eq!(outline[1].text, "Chapter 2");
3114        assert_eq!(outline[1].children.len(), 1); // 2.1
3115        assert_eq!(outline[1].children[0].children.len(), 1); // 2.1.1
3116    }
3117
3118    #[test]
3119    fn word_count_basic() {
3120        let mut doc = Document::new();
3121        doc.add_paragraph("Hello world");
3122        doc.add_paragraph("Three more words");
3123        assert_eq!(doc.word_count(), 5);
3124    }
3125
3126    #[test]
3127    fn audit_accessibility_missing_metadata() {
3128        let doc = Document::new();
3129        let issues = doc.audit_accessibility();
3130        // New document has no title or author
3131        assert!(issues.iter().any(|i| i.message.contains("no title")));
3132        assert!(issues.iter().any(|i| i.message.contains("no author")));
3133    }
3134
3135    #[test]
3136    fn audit_heading_level_gap() {
3137        let mut doc = Document::new();
3138        doc.set_title("Test");
3139        doc.set_author("Test");
3140        doc.add_paragraph("Ch 1").style("Heading1");
3141        doc.add_paragraph("Skip to 3").style("Heading3");
3142
3143        let issues = doc.audit_accessibility();
3144        assert!(
3145            issues
3146                .iter()
3147                .any(|i| i.message.contains("Heading level gap"))
3148        );
3149    }
3150
3151    #[test]
3152    fn links_returns_empty_for_no_hyperlinks() {
3153        let mut doc = Document::new();
3154        doc.add_paragraph("No links here.");
3155        assert!(doc.links().is_empty());
3156    }
3157
3158    #[test]
3159    fn images_returns_empty_for_text_only() {
3160        let mut doc = Document::new();
3161        doc.add_paragraph("Just text.");
3162        assert!(doc.images().is_empty());
3163    }
3164}