1use std::path::Path;
4
5use rdocx_opc::OpcPackage;
6use rdocx_opc::relationship::rel_types;
7use rdocx_oxml::document::{BodyContent, CT_Columns, CT_Document, CT_SectPr};
8use rdocx_oxml::drawing::{CT_Anchor, CT_Drawing, CT_Inline};
9use rdocx_oxml::header_footer::{CT_HdrFtr, HdrFtrRef, HdrFtrType};
10use rdocx_oxml::numbering::CT_Numbering;
11use rdocx_oxml::properties::{CT_PPr, CT_RPr};
12use rdocx_oxml::shared::{ST_PageOrientation, ST_SectionType};
13use rdocx_oxml::styles::CT_Styles;
14use rdocx_oxml::table::CT_Tbl;
15use rdocx_oxml::text::{CT_P, CT_R, RunContent};
16
17use rdocx_oxml::core_properties::CoreProperties;
18
19use crate::Length;
20use crate::error::{Error, Result};
21use crate::paragraph::{Paragraph, ParagraphRef};
22use crate::style::{self, Style, StyleBuilder};
23use crate::table::{Table, TableRef};
24
25pub struct Document {
30 package: OpcPackage,
31 document: CT_Document,
32 styles: CT_Styles,
33 numbering: Option<CT_Numbering>,
34 core_properties: Option<CoreProperties>,
35 doc_part_name: String,
37 image_counter: usize,
39}
40
41impl Document {
42 pub fn new() -> Self {
44 let mut package = OpcPackage::new_docx();
45 let document = CT_Document::new();
46 let styles = CT_Styles::new_default();
47
48 package
50 .get_or_create_part_rels("/word/document.xml")
51 .add(rel_types::STYLES, "styles.xml");
52
53 Document {
54 package,
55 document,
56 styles,
57 numbering: None,
58 core_properties: None,
59 doc_part_name: "/word/document.xml".to_string(),
60 image_counter: 0,
61 }
62 }
63
64 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
66 let package = OpcPackage::open(path)?;
67 Self::from_package(package)
68 }
69
70 pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
72 let cursor = std::io::Cursor::new(bytes);
73 let package = OpcPackage::from_reader(cursor)?;
74 Self::from_package(package)
75 }
76
77 fn from_package(package: OpcPackage) -> Result<Self> {
78 let doc_part_name = package.main_document_part().ok_or(Error::NoDocumentPart)?;
79
80 let doc_xml = package
81 .get_part(&doc_part_name)
82 .ok_or(Error::NoDocumentPart)?;
83 let document = CT_Document::from_xml(doc_xml)?;
84
85 let styles = if let Some(rels) = package.get_part_rels(&doc_part_name) {
87 if let Some(styles_rel) = rels.get_by_type(rel_types::STYLES) {
88 let styles_part =
89 OpcPackage::resolve_rel_target(&doc_part_name, &styles_rel.target);
90 if let Some(styles_xml) = package.get_part(&styles_part) {
91 CT_Styles::from_xml(styles_xml)?
92 } else {
93 CT_Styles::new_default()
94 }
95 } else {
96 CT_Styles::new_default()
97 }
98 } else {
99 CT_Styles::new_default()
100 };
101
102 let numbering = if let Some(rels) = package.get_part_rels(&doc_part_name) {
104 if let Some(num_rel) = rels.get_by_type(rel_types::NUMBERING) {
105 let num_part = OpcPackage::resolve_rel_target(&doc_part_name, &num_rel.target);
106 if let Some(num_xml) = package.get_part(&num_part) {
107 Some(CT_Numbering::from_xml(num_xml)?)
108 } else {
109 None
110 }
111 } else {
112 None
113 }
114 } else {
115 None
116 };
117
118 let core_properties = package
120 .get_part("/docProps/core.xml")
121 .and_then(|xml| CoreProperties::from_xml(xml).ok());
122
123 let image_counter = package
124 .parts
125 .keys()
126 .filter(|k| k.starts_with("/word/media/image"))
127 .count();
128
129 Ok(Document {
130 package,
131 document,
132 styles,
133 numbering,
134 core_properties,
135 doc_part_name,
136 image_counter,
137 })
138 }
139
140 pub fn save<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
142 self.flush_to_package()?;
143 self.package.save(path)?;
144 Ok(())
145 }
146
147 pub fn to_bytes(&mut self) -> Result<Vec<u8>> {
149 self.flush_to_package()?;
150 let mut buf = std::io::Cursor::new(Vec::new());
151 self.package.write_to(&mut buf)?;
152 Ok(buf.into_inner())
153 }
154
155 fn flush_to_package(&mut self) -> Result<()> {
157 let doc_xml = self.document.to_xml()?;
159 self.package.set_part(&self.doc_part_name, doc_xml);
160
161 let styles_xml = self.styles.to_xml()?;
163 self.package.set_part("/word/styles.xml", styles_xml);
164
165 if let Some(ref numbering) = self.numbering {
167 let numbering_xml = numbering.to_xml()?;
168 self.package.set_part("/word/numbering.xml", numbering_xml);
169 }
170
171 if let Some(ref props) = self.core_properties {
173 let core_xml = props.to_xml()?;
174 self.package.set_part("/docProps/core.xml", core_xml);
175 self.package.content_types.add_override(
176 "/docProps/core.xml",
177 "application/vnd.openxmlformats-package.core-properties+xml",
178 );
179 }
180
181 Ok(())
182 }
183
184 pub fn paragraphs(&self) -> Vec<ParagraphRef<'_>> {
188 self.document
189 .body
190 .paragraphs()
191 .map(|p| ParagraphRef { inner: p })
192 .collect()
193 }
194
195 pub fn add_paragraph(&mut self, text: &str) -> Paragraph<'_> {
197 let mut p = CT_P::new();
198 if !text.is_empty() {
199 p.add_run(text);
200 }
201 self.document.body.content.push(BodyContent::Paragraph(p));
202 match self.document.body.content.last_mut().unwrap() {
203 BodyContent::Paragraph(p) => Paragraph { inner: p },
204 _ => unreachable!(),
205 }
206 }
207
208 pub fn paragraph_count(&self) -> usize {
210 self.document.body.paragraphs().count()
211 }
212
213 pub fn paragraph_mut(&mut self, index: usize) -> Option<Paragraph<'_>> {
215 self.document
216 .body
217 .paragraphs_mut()
218 .nth(index)
219 .map(|p| Paragraph { inner: p })
220 }
221
222 pub fn tables(&self) -> Vec<TableRef<'_>> {
226 self.document
227 .body
228 .tables()
229 .map(|t| TableRef { inner: t })
230 .collect()
231 }
232
233 pub fn add_table(&mut self, rows: usize, cols: usize) -> Table<'_> {
236 use rdocx_oxml::table::{CT_Row, CT_TblGrid, CT_TblGridCol, CT_TblPr, CT_TblWidth, CT_Tc};
237 use rdocx_oxml::units::Twips;
238
239 let col_width = Twips(9360 / cols as i32);
241
242 let grid = CT_TblGrid {
243 columns: (0..cols)
244 .map(|_| CT_TblGridCol { width: col_width })
245 .collect(),
246 };
247
248 let mut tbl = CT_Tbl::new();
249 tbl.properties = Some(CT_TblPr {
250 width: Some(CT_TblWidth::dxa(col_width.0 * cols as i32)),
251 ..Default::default()
252 });
253 tbl.grid = Some(grid);
254
255 for _ in 0..rows {
256 let mut row = CT_Row::new();
257 for _ in 0..cols {
258 row.cells.push(CT_Tc::new());
259 }
260 tbl.rows.push(row);
261 }
262
263 self.document.body.content.push(BodyContent::Table(tbl));
264 match self.document.body.content.last_mut().unwrap() {
265 BodyContent::Table(t) => Table { inner: t },
266 _ => unreachable!(),
267 }
268 }
269
270 pub fn table_count(&self) -> usize {
272 self.document.body.tables().count()
273 }
274
275 pub fn content_count(&self) -> usize {
279 self.document.body.content_count()
280 }
281
282 pub fn insert_paragraph(&mut self, index: usize, text: &str) -> Paragraph<'_> {
287 let mut p = CT_P::new();
288 if !text.is_empty() {
289 p.add_run(text);
290 }
291 self.document.body.insert_paragraph(index, p);
292 match &mut self.document.body.content[index] {
293 BodyContent::Paragraph(p) => Paragraph { inner: p },
294 _ => unreachable!(),
295 }
296 }
297
298 pub fn insert_table(&mut self, index: usize, rows: usize, cols: usize) -> Table<'_> {
303 use rdocx_oxml::table::{CT_Row, CT_TblGrid, CT_TblGridCol, CT_TblPr, CT_TblWidth, CT_Tc};
304 use rdocx_oxml::units::Twips;
305
306 let col_width = Twips(9360 / cols as i32);
307 let grid = CT_TblGrid {
308 columns: (0..cols)
309 .map(|_| CT_TblGridCol { width: col_width })
310 .collect(),
311 };
312
313 let mut tbl = CT_Tbl::new();
314 tbl.properties = Some(CT_TblPr {
315 width: Some(CT_TblWidth::dxa(col_width.0 * cols as i32)),
316 ..Default::default()
317 });
318 tbl.grid = Some(grid);
319
320 for _ in 0..rows {
321 let mut row = CT_Row::new();
322 for _ in 0..cols {
323 row.cells.push(CT_Tc::new());
324 }
325 tbl.rows.push(row);
326 }
327
328 self.document.body.insert_table(index, tbl);
329 match &mut self.document.body.content[index] {
330 BodyContent::Table(t) => Table { inner: t },
331 _ => unreachable!(),
332 }
333 }
334
335 pub fn find_content_index(&self, text: &str) -> Option<usize> {
337 self.document.body.find_paragraph_index(text)
338 }
339
340 pub fn remove_content(&mut self, index: usize) -> bool {
344 self.document.body.remove(index).is_some()
345 }
346
347 pub fn add_picture(
357 &mut self,
358 image_data: &[u8],
359 image_filename: &str,
360 width: Length,
361 height: Length,
362 ) -> Paragraph<'_> {
363 let rel_id = self.embed_image(image_data, image_filename);
364
365 let inline = CT_Inline::new(&rel_id, width.to_emu(), height.to_emu());
366
367 let drawing = CT_Drawing::inline(inline);
368 let run = CT_R {
369 properties: None,
370 content: vec![RunContent::Drawing(drawing)],
371 extra_xml: Vec::new(),
372 };
373
374 let mut p = CT_P::new();
375 p.runs.push(run);
376 self.document.body.content.push(BodyContent::Paragraph(p));
377 match self.document.body.content.last_mut().unwrap() {
378 BodyContent::Paragraph(p) => Paragraph { inner: p },
379 _ => unreachable!(),
380 }
381 }
382
383 pub fn add_background_image(
390 &mut self,
391 image_data: &[u8],
392 image_filename: &str,
393 ) -> Paragraph<'_> {
394 let rel_id = self.embed_image(image_data, image_filename);
395
396 let sect = self
398 .document
399 .body
400 .sect_pr
401 .as_ref()
402 .cloned()
403 .unwrap_or_else(CT_SectPr::default_letter);
404 let page_width_emu = sect
405 .page_width
406 .unwrap_or(rdocx_oxml::units::Twips(12240))
407 .to_emu()
408 .0;
409 let page_height_emu = sect
410 .page_height
411 .unwrap_or(rdocx_oxml::units::Twips(15840))
412 .to_emu()
413 .0;
414
415 let anchor = CT_Anchor::background(&rel_id, page_width_emu, page_height_emu);
416 let drawing = CT_Drawing::anchor(anchor);
417 let run = CT_R {
418 properties: None,
419 content: vec![RunContent::Drawing(drawing)],
420 extra_xml: Vec::new(),
421 };
422
423 let mut p = CT_P::new();
424 p.runs.push(run);
425 self.document.body.insert_paragraph(0, p);
426 match &mut self.document.body.content[0] {
427 BodyContent::Paragraph(p) => Paragraph { inner: p },
428 _ => unreachable!(),
429 }
430 }
431
432 pub fn add_anchored_image(
437 &mut self,
438 image_data: &[u8],
439 image_filename: &str,
440 width: Length,
441 height: Length,
442 behind_text: bool,
443 ) -> Paragraph<'_> {
444 let rel_id = self.embed_image(image_data, image_filename);
445
446 let mut anchor = CT_Anchor::background(&rel_id, width.to_emu(), height.to_emu());
447 anchor.behind_doc = behind_text;
448
449 let drawing = CT_Drawing::anchor(anchor);
450 let run = CT_R {
451 properties: None,
452 content: vec![RunContent::Drawing(drawing)],
453 extra_xml: Vec::new(),
454 };
455
456 let mut p = CT_P::new();
457 p.runs.push(run);
458 self.document.body.insert_paragraph(0, p);
459 match &mut self.document.body.content[0] {
460 BodyContent::Paragraph(p) => Paragraph { inner: p },
461 _ => unreachable!(),
462 }
463 }
464
465 fn next_image_number(&mut self) -> usize {
467 self.image_counter += 1;
468 self.image_counter
469 }
470
471 fn embed_image(&mut self, image_data: &[u8], filename: &str) -> String {
473 use rdocx_opc::relationship::rel_types;
474
475 let ext = filename.rsplit('.').next().unwrap_or("png").to_lowercase();
477 let content_type = match ext.as_str() {
478 "png" => "image/png",
479 "jpg" | "jpeg" => "image/jpeg",
480 "gif" => "image/gif",
481 "bmp" => "image/bmp",
482 "tiff" | "tif" => "image/tiff",
483 "svg" => "image/svg+xml",
484 _ => "image/png",
485 };
486
487 let image_num = self.next_image_number();
489 let part_name = format!("/word/media/image{image_num}.{ext}");
490
491 self.package.set_part(&part_name, image_data.to_vec());
493
494 self.package.content_types.add_default(&ext, content_type);
496
497 let rel_target = format!("media/image{image_num}.{ext}");
499 let rels = self.package.get_or_create_part_rels(&self.doc_part_name);
500 rels.add(rel_types::IMAGE, &rel_target)
501 }
502
503 pub fn set_header(&mut self, text: &str) {
510 self.set_header_footer_part(text, true, HdrFtrType::Default);
511 }
512
513 pub fn set_footer(&mut self, text: &str) {
515 self.set_header_footer_part(text, false, HdrFtrType::Default);
516 }
517
518 pub fn set_first_page_header(&mut self, text: &str) {
520 self.set_different_first_page(true);
521 self.set_header_footer_part(text, true, HdrFtrType::First);
522 }
523
524 pub fn set_first_page_footer(&mut self, text: &str) {
526 self.set_different_first_page(true);
527 self.set_header_footer_part(text, false, HdrFtrType::First);
528 }
529
530 fn set_header_footer_part(&mut self, text: &str, is_header: bool, hdr_type: HdrFtrType) {
531 use rdocx_opc::relationship::rel_types;
532
533 let mut hdr_ftr = CT_HdrFtr::new();
534 let mut p = CT_P::new();
535 if !text.is_empty() {
536 p.add_run(text);
537 }
538 hdr_ftr.paragraphs.push(p);
539
540 let type_suffix = match hdr_type {
542 HdrFtrType::Default => "",
543 HdrFtrType::First => "First",
544 HdrFtrType::Even => "Even",
545 };
546 let (part_name, rel_type, content_type) = if is_header {
547 (
548 format!("/word/header{type_suffix}1.xml"),
549 rel_types::HEADER,
550 "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml",
551 )
552 } else {
553 (
554 format!("/word/footer{type_suffix}1.xml"),
555 rel_types::FOOTER,
556 "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml",
557 )
558 };
559
560 let xml = if is_header {
562 hdr_ftr
563 .to_xml_header()
564 .expect("header serialization failed")
565 } else {
566 hdr_ftr
567 .to_xml_footer()
568 .expect("footer serialization failed")
569 };
570
571 self.package.set_part(&part_name, xml);
572 self.package
573 .content_types
574 .add_override(&part_name, content_type);
575
576 let rel_target = part_name.trim_start_matches("/word/");
578 let rels = self.package.get_or_create_part_rels(&self.doc_part_name);
579 let rel_id = rels.add(rel_type, rel_target);
580
581 let sect = self.section_properties_mut();
583 let refs = if is_header {
584 &mut sect.header_refs
585 } else {
586 &mut sect.footer_refs
587 };
588
589 refs.retain(|r| r.hdr_ftr_type != hdr_type);
591 refs.push(HdrFtrRef {
592 hdr_ftr_type: hdr_type,
593 rel_id,
594 });
595 }
596
597 pub fn header_text(&self) -> Option<String> {
599 self.get_header_footer_text(true, HdrFtrType::Default)
600 }
601
602 pub fn footer_text(&self) -> Option<String> {
604 self.get_header_footer_text(false, HdrFtrType::Default)
605 }
606
607 pub fn set_header_image(
612 &mut self,
613 image_data: &[u8],
614 image_filename: &str,
615 width: Length,
616 height: Length,
617 ) {
618 self.set_header_footer_image_part(
619 image_data,
620 image_filename,
621 width,
622 height,
623 true,
624 HdrFtrType::Default,
625 );
626 }
627
628 pub fn set_footer_image(
630 &mut self,
631 image_data: &[u8],
632 image_filename: &str,
633 width: Length,
634 height: Length,
635 ) {
636 self.set_header_footer_image_part(
637 image_data,
638 image_filename,
639 width,
640 height,
641 false,
642 HdrFtrType::Default,
643 );
644 }
645
646 pub fn set_raw_header_with_images(
657 &mut self,
658 header_xml: Vec<u8>,
659 images: &[(&str, &[u8], &str)],
660 hdr_type: HdrFtrType,
661 ) {
662 self.set_raw_hdr_ftr_with_images(header_xml, images, true, hdr_type);
663 }
664
665 pub fn set_raw_footer_with_images(
667 &mut self,
668 footer_xml: Vec<u8>,
669 images: &[(&str, &[u8], &str)],
670 hdr_type: HdrFtrType,
671 ) {
672 self.set_raw_hdr_ftr_with_images(footer_xml, images, false, hdr_type);
673 }
674
675 fn set_raw_hdr_ftr_with_images(
676 &mut self,
677 xml: Vec<u8>,
678 images: &[(&str, &[u8], &str)],
679 is_header: bool,
680 hdr_type: HdrFtrType,
681 ) {
682 use rdocx_opc::relationship::rel_types;
683
684 let type_suffix = match hdr_type {
685 HdrFtrType::Default => "",
686 HdrFtrType::First => "First",
687 HdrFtrType::Even => "Even",
688 };
689 let (part_name, rel_type, content_type) = if is_header {
690 (
691 format!("/word/header{type_suffix}1.xml"),
692 rel_types::HEADER,
693 "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml",
694 )
695 } else {
696 (
697 format!("/word/footer{type_suffix}1.xml"),
698 rel_types::FOOTER,
699 "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml",
700 )
701 };
702
703 self.package.set_part(&part_name, xml);
705 self.package
706 .content_types
707 .add_override(&part_name, content_type);
708
709 for &(rel_id, image_data, image_filename) in images {
711 let ext = image_filename
712 .rsplit('.')
713 .next()
714 .unwrap_or("png")
715 .to_lowercase();
716 let img_content_type = match ext.as_str() {
717 "png" => "image/png",
718 "jpg" | "jpeg" => "image/jpeg",
719 _ => "image/png",
720 };
721
722 let image_num = self.next_image_number();
723 let img_part_name = format!("/word/media/image{image_num}.{ext}");
724 self.package.set_part(&img_part_name, image_data.to_vec());
725 self.package
726 .content_types
727 .add_default(&ext, img_content_type);
728
729 let img_rel_target = format!("media/image{image_num}.{ext}");
731 let hdr_rels = self.package.get_or_create_part_rels(&part_name);
732 hdr_rels.add_with_id(rel_id, rel_types::IMAGE, &img_rel_target);
733 }
734
735 let rel_target = part_name.trim_start_matches("/word/");
737 let rels = self.package.get_or_create_part_rels(&self.doc_part_name);
738 let rel_id = rels.add(rel_type, rel_target);
739
740 let sect = self.section_properties_mut();
742 let refs = if is_header {
743 &mut sect.header_refs
744 } else {
745 &mut sect.footer_refs
746 };
747
748 refs.retain(|r| r.hdr_ftr_type != hdr_type);
749 refs.push(HdrFtrRef {
750 hdr_ftr_type: hdr_type,
751 rel_id,
752 });
753 }
754
755 pub fn set_header_image_with_background(
761 &mut self,
762 image_data: &[u8],
763 image_filename: &str,
764 width: Length,
765 height: Length,
766 bg_color: &str,
767 ) {
768 self.set_header_footer_image_bg_part(
769 image_data,
770 image_filename,
771 width,
772 height,
773 Some(bg_color),
774 true,
775 HdrFtrType::Default,
776 );
777 }
778
779 pub fn set_first_page_header_image(
781 &mut self,
782 image_data: &[u8],
783 image_filename: &str,
784 width: Length,
785 height: Length,
786 ) {
787 self.set_different_first_page(true);
788 self.set_header_footer_image_part(
789 image_data,
790 image_filename,
791 width,
792 height,
793 true,
794 HdrFtrType::First,
795 );
796 }
797
798 fn set_header_footer_image_part(
799 &mut self,
800 image_data: &[u8],
801 image_filename: &str,
802 width: Length,
803 height: Length,
804 is_header: bool,
805 hdr_type: HdrFtrType,
806 ) {
807 use rdocx_opc::relationship::rel_types;
808
809 let type_suffix = match hdr_type {
811 HdrFtrType::Default => "",
812 HdrFtrType::First => "First",
813 HdrFtrType::Even => "Even",
814 };
815 let (part_name, rel_type, content_type) = if is_header {
816 (
817 format!("/word/header{type_suffix}1.xml"),
818 rel_types::HEADER,
819 "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml",
820 )
821 } else {
822 (
823 format!("/word/footer{type_suffix}1.xml"),
824 rel_types::FOOTER,
825 "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml",
826 )
827 };
828
829 let ext = image_filename
831 .rsplit('.')
832 .next()
833 .unwrap_or("png")
834 .to_lowercase();
835 let img_content_type = match ext.as_str() {
836 "png" => "image/png",
837 "jpg" | "jpeg" => "image/jpeg",
838 _ => "image/png",
839 };
840
841 let image_num = self.next_image_number();
843 let img_part_name = format!("/word/media/image{image_num}.{ext}");
844 self.package.set_part(&img_part_name, image_data.to_vec());
845 self.package
846 .content_types
847 .add_default(&ext, img_content_type);
848
849 let img_rel_target = format!("media/image{image_num}.{ext}");
851 let hdr_rels = self.package.get_or_create_part_rels(&part_name);
852 let img_rel_id = hdr_rels.add(rel_types::IMAGE, &img_rel_target);
853
854 let inline = CT_Inline::new(&img_rel_id, width.to_emu(), height.to_emu());
856 let drawing = CT_Drawing::inline(inline);
857 let run = CT_R {
858 properties: None,
859 content: vec![RunContent::Drawing(drawing)],
860 extra_xml: Vec::new(),
861 };
862
863 let mut hdr_ftr = CT_HdrFtr::new();
864 let mut p = CT_P::new();
865 p.runs.push(run);
866 hdr_ftr.paragraphs.push(p);
867
868 let xml = if is_header {
870 hdr_ftr
871 .to_xml_header()
872 .expect("header serialization failed")
873 } else {
874 hdr_ftr
875 .to_xml_footer()
876 .expect("footer serialization failed")
877 };
878
879 self.package.set_part(&part_name, xml);
880 self.package
881 .content_types
882 .add_override(&part_name, content_type);
883
884 let rel_target = part_name.trim_start_matches("/word/");
886 let rels = self.package.get_or_create_part_rels(&self.doc_part_name);
887 let rel_id = rels.add(rel_type, rel_target);
888
889 let sect = self.section_properties_mut();
891 let refs = if is_header {
892 &mut sect.header_refs
893 } else {
894 &mut sect.footer_refs
895 };
896
897 refs.retain(|r| r.hdr_ftr_type != hdr_type);
898 refs.push(HdrFtrRef {
899 hdr_ftr_type: hdr_type,
900 rel_id,
901 });
902 }
903
904 fn set_header_footer_image_bg_part(
905 &mut self,
906 image_data: &[u8],
907 image_filename: &str,
908 width: Length,
909 height: Length,
910 bg_color: Option<&str>,
911 is_header: bool,
912 hdr_type: HdrFtrType,
913 ) {
914 use rdocx_opc::relationship::rel_types;
915 use rdocx_oxml::properties::CT_Shd;
916
917 let type_suffix = match hdr_type {
919 HdrFtrType::Default => "",
920 HdrFtrType::First => "First",
921 HdrFtrType::Even => "Even",
922 };
923 let (part_name, rel_type, content_type) = if is_header {
924 (
925 format!("/word/header{type_suffix}1.xml"),
926 rel_types::HEADER,
927 "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml",
928 )
929 } else {
930 (
931 format!("/word/footer{type_suffix}1.xml"),
932 rel_types::FOOTER,
933 "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml",
934 )
935 };
936
937 let ext = image_filename
939 .rsplit('.')
940 .next()
941 .unwrap_or("png")
942 .to_lowercase();
943 let img_content_type = match ext.as_str() {
944 "png" => "image/png",
945 "jpg" | "jpeg" => "image/jpeg",
946 _ => "image/png",
947 };
948
949 let image_num = self.next_image_number();
950 let img_part_name = format!("/word/media/image{image_num}.{ext}");
951 self.package.set_part(&img_part_name, image_data.to_vec());
952 self.package
953 .content_types
954 .add_default(&ext, img_content_type);
955
956 let img_rel_target = format!("media/image{image_num}.{ext}");
958 let hdr_rels = self.package.get_or_create_part_rels(&part_name);
959 let img_rel_id = hdr_rels.add(rel_types::IMAGE, &img_rel_target);
960
961 let inline = CT_Inline::new(&img_rel_id, width.to_emu(), height.to_emu());
963 let drawing = CT_Drawing::inline(inline);
964 let run = CT_R {
965 properties: None,
966 content: vec![RunContent::Drawing(drawing)],
967 extra_xml: Vec::new(),
968 };
969
970 let mut hdr_ftr = CT_HdrFtr::new();
971 let mut p = CT_P::new();
972 p.runs.push(run);
973
974 if let Some(color) = bg_color {
976 let ppr = CT_PPr {
977 shading: Some(CT_Shd {
978 val: "clear".to_string(),
979 color: Some("auto".to_string()),
980 fill: Some(color.to_string()),
981 }),
982 ..Default::default()
983 };
984 p.properties = Some(ppr);
985 }
986
987 hdr_ftr.paragraphs.push(p);
988
989 let xml = if is_header {
991 hdr_ftr
992 .to_xml_header()
993 .expect("header serialization failed")
994 } else {
995 hdr_ftr
996 .to_xml_footer()
997 .expect("footer serialization failed")
998 };
999
1000 self.package.set_part(&part_name, xml);
1001 self.package
1002 .content_types
1003 .add_override(&part_name, content_type);
1004
1005 let rel_target = part_name.trim_start_matches("/word/");
1007 let rels = self.package.get_or_create_part_rels(&self.doc_part_name);
1008 let rel_id = rels.add(rel_type, rel_target);
1009
1010 let sect = self.section_properties_mut();
1012 let refs = if is_header {
1013 &mut sect.header_refs
1014 } else {
1015 &mut sect.footer_refs
1016 };
1017
1018 refs.retain(|r| r.hdr_ftr_type != hdr_type);
1019 refs.push(HdrFtrRef {
1020 hdr_ftr_type: hdr_type,
1021 rel_id,
1022 });
1023 }
1024
1025 fn get_header_footer_text(&self, is_header: bool, hdr_type: HdrFtrType) -> Option<String> {
1026 let sect = self.document.body.sect_pr.as_ref()?;
1027 let refs = if is_header {
1028 §.header_refs
1029 } else {
1030 §.footer_refs
1031 };
1032 let hdr_ref = refs.iter().find(|r| r.hdr_ftr_type == hdr_type)?;
1033
1034 let rels = self.package.get_part_rels(&self.doc_part_name)?;
1036 let rel = rels.get_by_id(&hdr_ref.rel_id)?;
1037 let part_name = OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target);
1038 let xml = self.package.get_part(&part_name)?;
1039 let hdr_ftr = CT_HdrFtr::from_xml(xml).ok()?;
1040 Some(hdr_ftr.text())
1041 }
1042
1043 fn ensure_numbering(&mut self) -> &mut CT_Numbering {
1047 if self.numbering.is_none() {
1048 self.numbering = Some(CT_Numbering::new());
1049
1050 self.package
1052 .get_or_create_part_rels(&self.doc_part_name)
1053 .add(rel_types::NUMBERING, "numbering.xml");
1054 self.package.content_types.add_override(
1055 "/word/numbering.xml",
1056 "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml",
1057 );
1058 }
1059 self.numbering.as_mut().unwrap()
1060 }
1061
1062 pub fn add_bullet_list_item(&mut self, text: &str, level: u32) -> Paragraph<'_> {
1067 let num_id = {
1069 let numbering = self.ensure_numbering();
1070 let existing = numbering.nums.iter().find(|n| {
1072 numbering
1073 .get_abstract_num_for(n.num_id)
1074 .map(|a| {
1075 a.levels.first().and_then(|l| l.num_fmt)
1076 == Some(rdocx_oxml::numbering::ST_NumberFormat::Bullet)
1077 })
1078 .unwrap_or(false)
1079 });
1080 if let Some(existing) = existing {
1081 existing.num_id
1082 } else {
1083 numbering.add_bullet_list()
1084 }
1085 };
1086
1087 let mut p = CT_P::new();
1088 if !text.is_empty() {
1089 p.add_run(text);
1090 }
1091 let ppr = CT_PPr {
1092 num_id: Some(num_id),
1093 num_ilvl: Some(level),
1094 ..Default::default()
1095 };
1096 p.properties = Some(ppr);
1097
1098 self.document.body.content.push(BodyContent::Paragraph(p));
1099 match self.document.body.content.last_mut().unwrap() {
1100 BodyContent::Paragraph(p) => Paragraph { inner: p },
1101 _ => unreachable!(),
1102 }
1103 }
1104
1105 pub fn add_numbered_list_item(&mut self, text: &str, level: u32) -> Paragraph<'_> {
1110 let num_id = {
1112 let numbering = self.ensure_numbering();
1113 let existing = numbering.nums.iter().find(|n| {
1115 numbering
1116 .get_abstract_num_for(n.num_id)
1117 .map(|a| {
1118 a.levels.first().and_then(|l| l.num_fmt)
1119 == Some(rdocx_oxml::numbering::ST_NumberFormat::Decimal)
1120 })
1121 .unwrap_or(false)
1122 });
1123 if let Some(existing) = existing {
1124 existing.num_id
1125 } else {
1126 numbering.add_numbered_list()
1127 }
1128 };
1129
1130 let mut p = CT_P::new();
1131 if !text.is_empty() {
1132 p.add_run(text);
1133 }
1134 let ppr = CT_PPr {
1135 num_id: Some(num_id),
1136 num_ilvl: Some(level),
1137 ..Default::default()
1138 };
1139 p.properties = Some(ppr);
1140
1141 self.document.body.content.push(BodyContent::Paragraph(p));
1142 match self.document.body.content.last_mut().unwrap() {
1143 BodyContent::Paragraph(p) => Paragraph { inner: p },
1144 _ => unreachable!(),
1145 }
1146 }
1147
1148 pub fn styles(&self) -> Vec<Style<'_>> {
1152 self.styles
1153 .styles
1154 .iter()
1155 .map(|s| Style { inner: s })
1156 .collect()
1157 }
1158
1159 pub fn style(&self, style_id: &str) -> Option<Style<'_>> {
1161 self.styles.get_by_id(style_id).map(|s| Style { inner: s })
1162 }
1163
1164 pub fn add_style(&mut self, builder: StyleBuilder) {
1168 self.styles.styles.push(builder.build());
1169 }
1170
1171 pub fn resolve_paragraph_properties(&self, style_id: Option<&str>) -> CT_PPr {
1174 style::resolve_paragraph_properties(style_id, &self.styles)
1175 }
1176
1177 pub fn resolve_run_properties(
1180 &self,
1181 para_style_id: Option<&str>,
1182 run_style_id: Option<&str>,
1183 ) -> CT_RPr {
1184 style::resolve_run_properties(para_style_id, run_style_id, &self.styles)
1185 }
1186
1187 pub fn section_properties(&self) -> Option<&CT_SectPr> {
1191 self.document.body.sect_pr.as_ref()
1192 }
1193
1194 pub fn section_properties_mut(&mut self) -> &mut CT_SectPr {
1196 self.document
1197 .body
1198 .sect_pr
1199 .get_or_insert_with(CT_SectPr::default_letter)
1200 }
1201
1202 pub fn set_page_size(&mut self, width: Length, height: Length) {
1204 let sect = self.section_properties_mut();
1205 sect.page_width = Some(width.as_twips());
1206 sect.page_height = Some(height.as_twips());
1207 }
1208
1209 pub fn set_landscape(&mut self) {
1211 let sect = self.section_properties_mut();
1212 sect.orientation = Some(ST_PageOrientation::Landscape);
1213 if let (Some(w), Some(h)) = (sect.page_width, sect.page_height)
1215 && w.0 < h.0
1216 {
1217 sect.page_width = Some(h);
1218 sect.page_height = Some(w);
1219 }
1220 }
1221
1222 pub fn set_portrait(&mut self) {
1224 let sect = self.section_properties_mut();
1225 sect.orientation = Some(ST_PageOrientation::Portrait);
1226 if let (Some(w), Some(h)) = (sect.page_width, sect.page_height)
1228 && w.0 > h.0
1229 {
1230 sect.page_width = Some(h);
1231 sect.page_height = Some(w);
1232 }
1233 }
1234
1235 pub fn set_margins(&mut self, top: Length, right: Length, bottom: Length, left: Length) {
1237 let sect = self.section_properties_mut();
1238 sect.margin_top = Some(top.as_twips());
1239 sect.margin_right = Some(right.as_twips());
1240 sect.margin_bottom = Some(bottom.as_twips());
1241 sect.margin_left = Some(left.as_twips());
1242 }
1243
1244 pub fn set_columns(&mut self, num: u32, spacing: Length) {
1246 let sect = self.section_properties_mut();
1247 sect.columns = Some(CT_Columns {
1248 num: Some(num),
1249 space: Some(spacing.as_twips()),
1250 equal_width: Some(true),
1251 sep: None,
1252 columns: Vec::new(),
1253 });
1254 }
1255
1256 pub fn set_header_footer_distance(&mut self, header: Length, footer: Length) {
1258 let sect = self.section_properties_mut();
1259 sect.header_distance = Some(header.as_twips());
1260 sect.footer_distance = Some(footer.as_twips());
1261 }
1262
1263 pub fn set_gutter(&mut self, gutter: Length) {
1265 self.section_properties_mut().gutter = Some(gutter.as_twips());
1266 }
1267
1268 pub fn set_different_first_page(&mut self, val: bool) {
1270 self.section_properties_mut().title_pg = Some(val);
1271 }
1272
1273 pub fn title(&self) -> Option<&str> {
1277 self.core_properties.as_ref()?.title.as_deref()
1278 }
1279
1280 pub fn set_title(&mut self, title: &str) {
1282 self.ensure_core_properties().title = Some(title.to_string());
1283 }
1284
1285 pub fn author(&self) -> Option<&str> {
1287 self.core_properties.as_ref()?.creator.as_deref()
1288 }
1289
1290 pub fn set_author(&mut self, author: &str) {
1292 self.ensure_core_properties().creator = Some(author.to_string());
1293 }
1294
1295 pub fn subject(&self) -> Option<&str> {
1297 self.core_properties.as_ref()?.subject.as_deref()
1298 }
1299
1300 pub fn set_subject(&mut self, subject: &str) {
1302 self.ensure_core_properties().subject = Some(subject.to_string());
1303 }
1304
1305 pub fn keywords(&self) -> Option<&str> {
1307 self.core_properties.as_ref()?.keywords.as_deref()
1308 }
1309
1310 pub fn set_keywords(&mut self, keywords: &str) {
1312 self.ensure_core_properties().keywords = Some(keywords.to_string());
1313 }
1314
1315 fn ensure_core_properties(&mut self) -> &mut CoreProperties {
1316 self.core_properties
1317 .get_or_insert_with(CoreProperties::default)
1318 }
1319
1320 pub fn append(&mut self, other: &Document) {
1327 self.merge_styles(other);
1328
1329 let start_idx = self.document.body.content.len();
1330 for content in &other.document.body.content {
1331 self.document.body.content.push(content.clone());
1332 }
1333
1334 self.remap_merged_numbering(other, start_idx);
1335 }
1336
1337 pub fn append_with_break(&mut self, other: &Document, break_type: crate::SectionBreak) {
1339 let mut p = CT_P::new();
1341 let sect_pr = match break_type {
1342 crate::SectionBreak::NextPage => CT_SectPr::default_letter(),
1343 crate::SectionBreak::Continuous => {
1344 let mut sp = CT_SectPr::default_letter();
1345 sp.section_type = Some(ST_SectionType::Continuous);
1346 sp
1347 }
1348 crate::SectionBreak::EvenPage => {
1349 let mut sp = CT_SectPr::default_letter();
1350 sp.section_type = Some(ST_SectionType::EvenPage);
1351 sp
1352 }
1353 crate::SectionBreak::OddPage => {
1354 let mut sp = CT_SectPr::default_letter();
1355 sp.section_type = Some(ST_SectionType::OddPage);
1356 sp
1357 }
1358 };
1359 p.properties = Some(CT_PPr {
1360 sect_pr: Some(sect_pr),
1361 ..Default::default()
1362 });
1363 self.document.body.content.push(BodyContent::Paragraph(p));
1364
1365 self.append(other);
1366 }
1367
1368 pub fn insert_document(&mut self, index: usize, other: &Document) {
1370 self.merge_styles(other);
1371
1372 let insert_at = index.min(self.document.body.content.len());
1373 for (i, content) in other.document.body.content.iter().enumerate() {
1374 self.document
1375 .body
1376 .content
1377 .insert(insert_at + i, content.clone());
1378 }
1379
1380 self.remap_merged_numbering(other, insert_at);
1381 }
1382
1383 fn merge_styles(&mut self, other: &Document) {
1385 for style in &other.styles.styles {
1386 if self.styles.get_by_id(&style.style_id).is_none() {
1387 self.styles.styles.push(style.clone());
1388 }
1389 }
1390 }
1391
1392 fn remap_merged_numbering(&mut self, other: &Document, start_idx: usize) {
1395 let Some(other_numbering) = &other.numbering else {
1396 return;
1397 };
1398
1399 let numbering = self
1400 .numbering
1401 .get_or_insert_with(|| rdocx_oxml::numbering::CT_Numbering {
1402 abstract_nums: Vec::new(),
1403 nums: Vec::new(),
1404 });
1405
1406 let max_abstract_id = numbering
1408 .abstract_nums
1409 .iter()
1410 .map(|a| a.abstract_num_id)
1411 .max()
1412 .unwrap_or(0);
1413 let max_num_id = numbering.nums.iter().map(|n| n.num_id).max().unwrap_or(0);
1414
1415 let abstract_offset = max_abstract_id + 1;
1416 let num_offset = max_num_id + 1;
1417
1418 for abs_num in &other_numbering.abstract_nums {
1420 let mut new_abs = abs_num.clone();
1421 new_abs.abstract_num_id += abstract_offset;
1422 numbering.abstract_nums.push(new_abs);
1423 }
1424
1425 for num in &other_numbering.nums {
1427 let mut new_num = num.clone();
1428 new_num.num_id += num_offset;
1429 new_num.abstract_num_id += abstract_offset;
1430 numbering.nums.push(new_num);
1431 }
1432
1433 let incoming_count = other.document.body.content.len();
1435 for content in self.document.body.content[start_idx..start_idx + incoming_count].iter_mut()
1436 {
1437 Self::remap_num_ids(content, num_offset);
1438 }
1439 }
1440
1441 fn remap_num_ids(content: &mut BodyContent, offset: u32) {
1443 match content {
1444 BodyContent::Paragraph(p) => {
1445 Self::remap_paragraph_num_id(p, offset);
1446 }
1447 BodyContent::Table(tbl) => {
1448 Self::remap_table_num_ids(tbl, offset);
1449 }
1450 BodyContent::RawXml(_) => {}
1451 }
1452 }
1453
1454 fn remap_paragraph_num_id(p: &mut CT_P, offset: u32) {
1455 if let Some(ppr) = &mut p.properties
1456 && let Some(num_id) = &mut ppr.num_id
1457 && *num_id > 0
1458 {
1459 *num_id += offset;
1460 }
1461 }
1462
1463 fn remap_table_num_ids(tbl: &mut CT_Tbl, offset: u32) {
1464 for row in &mut tbl.rows {
1465 for cell in &mut row.cells {
1466 for cc in &mut cell.content {
1467 match cc {
1468 rdocx_oxml::table::CellContent::Paragraph(p) => {
1469 Self::remap_paragraph_num_id(p, offset);
1470 }
1471 rdocx_oxml::table::CellContent::Table(nested) => {
1472 Self::remap_table_num_ids(nested, offset);
1473 }
1474 }
1475 }
1476 }
1477 }
1478 }
1479
1480 pub fn insert_toc(&mut self, index: usize, max_level: u32) {
1492 use rdocx_oxml::borders::{CT_TabStop, CT_Tabs};
1493 use rdocx_oxml::shared::{ST_TabJc, ST_TabLeader};
1494 use rdocx_oxml::text::HyperlinkSpan;
1495 use rdocx_oxml::units::Twips;
1496
1497 let max_level = max_level.clamp(1, 9);
1498
1499 struct HeadingInfo {
1501 content_index: usize,
1502 level: u32,
1503 text: String,
1504 bookmark_name: String,
1505 }
1506
1507 let mut headings = Vec::new();
1508 let mut toc_counter = 0u32;
1509
1510 for (idx, content) in self.document.body.content.iter().enumerate() {
1511 if let BodyContent::Paragraph(p) = content
1512 && let Some(level) = Self::detect_heading_level_for_toc(p)
1513 && level <= max_level
1514 {
1515 let text = p.text();
1516 if !text.trim().is_empty() {
1517 toc_counter += 1;
1518 headings.push(HeadingInfo {
1519 content_index: idx,
1520 level,
1521 text,
1522 bookmark_name: format!("_Toc{toc_counter}"),
1523 });
1524 }
1525 }
1526 }
1527
1528 let mut bookmark_id = 100; for heading in &headings {
1533 if let Some(BodyContent::Paragraph(p)) =
1534 self.document.body.content.get_mut(heading.content_index)
1535 {
1536 let bm_start = format!(
1537 "<w:bookmarkStart w:id=\"{bookmark_id}\" w:name=\"{}\"/>",
1538 heading.bookmark_name
1539 );
1540 let bm_end = format!("<w:bookmarkEnd w:id=\"{bookmark_id}\"/>");
1541 p.extra_xml.push((0, bm_start.into_bytes()));
1543 p.extra_xml.push((p.runs.len(), bm_end.into_bytes()));
1545 bookmark_id += 1;
1546 }
1547 }
1548
1549 let right_tab = CT_Tabs {
1552 tabs: vec![CT_TabStop {
1553 val: ST_TabJc::Right,
1554 pos: Twips(9360),
1555 leader: Some(ST_TabLeader::Dot),
1556 }],
1557 };
1558
1559 let mut toc_paragraphs: Vec<CT_P> = Vec::new();
1560
1561 let mut title_p = CT_P::new();
1563 let mut title_r = CT_R::new("Table of Contents");
1564 title_r.properties = Some(CT_RPr {
1565 bold: Some(true),
1566 ..Default::default()
1567 });
1568 title_p.runs.push(title_r);
1569 title_p.properties = Some(CT_PPr {
1570 space_after: Some(Twips(120)),
1571 ..Default::default()
1572 });
1573 toc_paragraphs.push(title_p);
1574
1575 for heading in &headings {
1576 let mut p = CT_P::new();
1577
1578 let indent = Twips(360 * (heading.level as i32 - 1));
1580
1581 p.properties = Some(CT_PPr {
1582 tabs: Some(right_tab.clone()),
1583 ind_left: if indent.0 > 0 { Some(indent) } else { None },
1584 ..Default::default()
1585 });
1586
1587 let text_run = CT_R::new(&heading.text);
1589 p.runs.push(text_run);
1590
1591 p.runs.push(CT_R {
1593 properties: None,
1594 content: vec![rdocx_oxml::text::RunContent::Tab],
1595 extra_xml: Vec::new(),
1596 });
1597
1598 p.hyperlinks.push(HyperlinkSpan {
1600 rel_id: None,
1601 anchor: Some(heading.bookmark_name.clone()),
1602 run_start: 0,
1603 run_end: 1, });
1605
1606 toc_paragraphs.push(p);
1607 }
1608
1609 let insert_at = index.min(self.document.body.content.len());
1611 for (i, p) in toc_paragraphs.into_iter().enumerate() {
1612 self.document
1613 .body
1614 .content
1615 .insert(insert_at + i, BodyContent::Paragraph(p));
1616 }
1617 }
1618
1619 fn detect_heading_level_for_toc(para: &CT_P) -> Option<u32> {
1621 let ppr = para.properties.as_ref()?;
1622 let style_id = ppr.style_id.as_deref()?;
1623 let rest = style_id.strip_prefix("Heading")?;
1624 rest.parse::<u32>().ok().filter(|n| (1..=9).contains(n))
1625 }
1626
1627 pub fn replace_text(&mut self, placeholder: &str, replacement: &str) -> usize {
1635 use rdocx_oxml::placeholder;
1636
1637 let mut count = 0;
1638
1639 for content in &mut self.document.body.content {
1641 match content {
1642 BodyContent::Paragraph(p) => {
1643 count += placeholder::replace_in_paragraph(p, placeholder, replacement);
1644 }
1645 BodyContent::Table(t) => {
1646 count += placeholder::replace_in_table(t, placeholder, replacement);
1647 }
1648 _ => {} }
1650 }
1651
1652 if let Some(sect_pr) = self.document.body.sect_pr.as_ref() {
1654 let hdr_rel_ids: Vec<String> = sect_pr
1655 .header_refs
1656 .iter()
1657 .map(|r| r.rel_id.clone())
1658 .collect();
1659 let ftr_rel_ids: Vec<String> = sect_pr
1660 .footer_refs
1661 .iter()
1662 .map(|r| r.rel_id.clone())
1663 .collect();
1664
1665 for rel_id in hdr_rel_ids {
1666 if let Some(mut hf) = self.load_header_footer(&rel_id) {
1667 let n =
1668 placeholder::replace_in_header_footer(&mut hf, placeholder, replacement);
1669 if n > 0 {
1670 self.save_header_footer(&rel_id, &hf, true);
1671 count += n;
1672 }
1673 }
1674 }
1675 for rel_id in ftr_rel_ids {
1676 if let Some(mut hf) = self.load_header_footer(&rel_id) {
1677 let n =
1678 placeholder::replace_in_header_footer(&mut hf, placeholder, replacement);
1679 if n > 0 {
1680 self.save_header_footer(&rel_id, &hf, false);
1681 count += n;
1682 }
1683 }
1684 }
1685 }
1686
1687 if let Ok(()) = self.flush_to_package() {
1689 count += self.replace_in_xml_parts(placeholder, replacement);
1690 }
1691
1692 count
1693 }
1694
1695 pub fn replace_all(&mut self, replacements: &std::collections::HashMap<&str, &str>) -> usize {
1697 let mut count = 0;
1698 for (placeholder, replacement) in replacements {
1699 count += self.replace_text(placeholder, replacement);
1700 }
1701 count
1702 }
1703
1704 pub fn replace_regex(&mut self, pattern: &str, replacement: &str) -> Result<usize> {
1712 let re =
1713 regex::Regex::new(pattern).map_err(|e| Error::Other(format!("invalid regex: {e}")))?;
1714 Ok(self.replace_regex_compiled(&re, replacement))
1715 }
1716
1717 pub fn replace_all_regex(&mut self, patterns: &[(String, String)]) -> Result<usize> {
1719 let mut count = 0;
1720 for (pattern, replacement) in patterns {
1721 count += self.replace_regex(pattern, replacement)?;
1722 }
1723 Ok(count)
1724 }
1725
1726 fn replace_regex_compiled(&mut self, re: ®ex::Regex, replacement: &str) -> usize {
1728 use rdocx_oxml::placeholder;
1729
1730 let mut count = 0;
1731
1732 for content in &mut self.document.body.content {
1734 match content {
1735 BodyContent::Paragraph(p) => {
1736 count += placeholder::replace_regex_in_paragraph(p, re, replacement);
1737 }
1738 BodyContent::Table(t) => {
1739 count += placeholder::replace_regex_in_table(t, re, replacement);
1740 }
1741 _ => {}
1742 }
1743 }
1744
1745 if let Some(sect_pr) = self.document.body.sect_pr.as_ref() {
1747 let hdr_rel_ids: Vec<String> = sect_pr
1748 .header_refs
1749 .iter()
1750 .map(|r| r.rel_id.clone())
1751 .collect();
1752 let ftr_rel_ids: Vec<String> = sect_pr
1753 .footer_refs
1754 .iter()
1755 .map(|r| r.rel_id.clone())
1756 .collect();
1757
1758 for rel_id in hdr_rel_ids {
1759 if let Some(mut hf) = self.load_header_footer(&rel_id) {
1760 let n = placeholder::replace_regex_in_header_footer(&mut hf, re, replacement);
1761 if n > 0 {
1762 self.save_header_footer(&rel_id, &hf, true);
1763 count += n;
1764 }
1765 }
1766 }
1767 for rel_id in ftr_rel_ids {
1768 if let Some(mut hf) = self.load_header_footer(&rel_id) {
1769 let n = placeholder::replace_regex_in_header_footer(&mut hf, re, replacement);
1770 if n > 0 {
1771 self.save_header_footer(&rel_id, &hf, false);
1772 count += n;
1773 }
1774 }
1775 }
1776 }
1777
1778 count
1779 }
1780
1781 fn load_header_footer(&self, rel_id: &str) -> Option<CT_HdrFtr> {
1783 let rels = self.package.get_part_rels(&self.doc_part_name)?;
1784 let rel = rels.get_by_id(rel_id)?;
1785 let part_name = OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target);
1786 let xml = self.package.get_part(&part_name)?;
1787 CT_HdrFtr::from_xml(xml).ok()
1788 }
1789
1790 fn replace_in_xml_parts(&mut self, placeholder: &str, replacement: &str) -> usize {
1794 use rdocx_oxml::placeholder::{replace_in_chart_xml, replace_in_xml_part};
1795
1796 let mut count = 0;
1797
1798 let mut xml_parts: Vec<String> = vec![self.doc_part_name.clone()];
1800 if let Some(sect_pr) = self.document.body.sect_pr.as_ref()
1801 && let Some(rels) = self.package.get_part_rels(&self.doc_part_name)
1802 {
1803 for href in §_pr.header_refs {
1804 if let Some(rel) = rels.get_by_id(&href.rel_id) {
1805 xml_parts.push(OpcPackage::resolve_rel_target(
1806 &self.doc_part_name,
1807 &rel.target,
1808 ));
1809 }
1810 }
1811 for fref in §_pr.footer_refs {
1812 if let Some(rel) = rels.get_by_id(&fref.rel_id) {
1813 xml_parts.push(OpcPackage::resolve_rel_target(
1814 &self.doc_part_name,
1815 &rel.target,
1816 ));
1817 }
1818 }
1819 }
1820
1821 for part_name in xml_parts {
1822 if let Some(xml) = self.package.get_part(&part_name) {
1823 let xml = xml.to_vec();
1824 if let Ok((new_xml, n)) = replace_in_xml_part(&xml, placeholder, replacement)
1825 && n > 0
1826 {
1827 self.package.set_part(&part_name, new_xml);
1828 count += n;
1829 }
1830 }
1831 }
1832
1833 let chart_parts: Vec<String> = self
1835 .package
1836 .get_part_rels(&self.doc_part_name)
1837 .map(|rels| {
1838 rels.get_all_by_type(rel_types::CHART)
1839 .iter()
1840 .map(|rel| OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target))
1841 .collect()
1842 })
1843 .unwrap_or_default();
1844
1845 for part_name in chart_parts {
1846 if let Some(xml) = self.package.get_part(&part_name) {
1847 let xml = xml.to_vec();
1848 if let Ok((new_xml, n)) = replace_in_chart_xml(&xml, placeholder, replacement)
1849 && n > 0
1850 {
1851 self.package.set_part(&part_name, new_xml);
1852 count += n;
1853 }
1854 }
1855 }
1856
1857 if count > 0
1859 && let Some(doc_xml) = self.package.get_part(&self.doc_part_name)
1860 && let Ok(doc) = CT_Document::from_xml(doc_xml)
1861 {
1862 self.document = doc;
1863 }
1864
1865 count
1866 }
1867
1868 pub fn to_pdf(&self) -> Result<Vec<u8>> {
1880 self.to_pdf_with_fonts(&[])
1881 }
1882
1883 pub fn to_pdf_with_fonts(&self, font_files: &[(&str, &[u8])]) -> Result<Vec<u8>> {
1896 let mut input = self.build_layout_input();
1897 for (family, data) in font_files {
1898 input.fonts.push(rdocx_layout::FontFile {
1899 family: family.to_string(),
1900 data: data.to_vec(),
1901 });
1902 }
1903 let layout = rdocx_layout::layout_document(&input)?;
1904 Ok(rdocx_pdf::render_to_pdf(&layout))
1905 }
1906
1907 pub fn save_pdf<P: AsRef<Path>>(&self, path: P) -> Result<()> {
1909 let pdf_bytes = self.to_pdf()?;
1910 std::fs::write(path, pdf_bytes)?;
1911 Ok(())
1912 }
1913
1914 pub fn to_html(&self) -> String {
1916 let input = self.build_html_input();
1917 rdocx_html::to_html_document(&input, &rdocx_html::HtmlOptions::default())
1918 }
1919
1920 pub fn to_html_fragment(&self) -> String {
1922 let input = self.build_html_input();
1923 rdocx_html::to_html_fragment(&input, &rdocx_html::HtmlOptions::default())
1924 }
1925
1926 pub fn to_markdown(&self) -> String {
1928 let input = self.build_html_input();
1929 rdocx_html::to_markdown(&input)
1930 }
1931
1932 fn build_html_input(&self) -> rdocx_html::HtmlInput {
1934 use rdocx_opc::relationship::rel_types;
1935 use std::collections::HashMap;
1936
1937 let mut images: HashMap<String, rdocx_html::ImageData> = HashMap::new();
1938 let mut hyperlink_urls: HashMap<String, String> = HashMap::new();
1939
1940 if let Some(rels) = self.package.get_part_rels(&self.doc_part_name) {
1941 for rel in &rels.items {
1942 match rel.rel_type.as_str() {
1943 t if t == rel_types::IMAGE => {
1944 let part_name =
1945 OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target);
1946 if let Some(data) = self.package.get_part(&part_name) {
1947 let content_type = guess_image_content_type(&part_name);
1948 images.insert(
1949 rel.id.clone(),
1950 rdocx_html::ImageData {
1951 data: data.to_vec(),
1952 content_type,
1953 },
1954 );
1955 }
1956 }
1957 t if t == rel_types::HYPERLINK => {
1958 if rel.target_mode.as_ref().is_some_and(|m| m == "External") {
1959 hyperlink_urls.insert(rel.id.clone(), rel.target.clone());
1960 }
1961 }
1962 _ => {}
1963 }
1964 }
1965 }
1966
1967 rdocx_html::HtmlInput {
1968 document: self.document.clone(),
1969 styles: self.styles.clone(),
1970 numbering: self.numbering.clone(),
1971 images,
1972 hyperlink_urls,
1973 }
1974 }
1975
1976 pub fn render_page_to_png(&self, page_index: usize, dpi: f64) -> Result<Option<Vec<u8>>> {
1982 let input = self.build_layout_input();
1983 let layout = rdocx_layout::layout_document(&input)?;
1984 Ok(rdocx_pdf::render_page_to_png(&layout, page_index, dpi))
1985 }
1986
1987 pub fn render_all_pages(&self, dpi: f64) -> Result<Vec<Vec<u8>>> {
1989 let input = self.build_layout_input();
1990 let layout = rdocx_layout::layout_document(&input)?;
1991 Ok(rdocx_pdf::render_all_pages(&layout, dpi))
1992 }
1993
1994 fn build_layout_input(&self) -> rdocx_layout::LayoutInput {
1996 use rdocx_layout::{ImageData, LayoutInput};
1997 use rdocx_opc::relationship::rel_types;
1998 use std::collections::HashMap;
1999
2000 let mut headers: HashMap<String, CT_HdrFtr> = HashMap::new();
2001 let mut footers: HashMap<String, CT_HdrFtr> = HashMap::new();
2002 let mut images: HashMap<String, ImageData> = HashMap::new();
2003 let mut hyperlink_urls: HashMap<String, String> = HashMap::new();
2004 let mut footnotes = None;
2005 let mut endnotes = None;
2006
2007 let fonts = self.extract_embedded_fonts();
2009
2010 if let Some(rels) = self.package.get_part_rels(&self.doc_part_name) {
2011 for rel in &rels.items {
2012 match rel.rel_type.as_str() {
2013 t if t == rel_types::HEADER => {
2014 let part_name =
2015 OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target);
2016 if let Some(xml) = self.package.get_part(&part_name)
2017 && let Ok(hf) = CT_HdrFtr::from_xml(xml)
2018 {
2019 headers.insert(rel.id.clone(), hf);
2020 }
2021 }
2022 t if t == rel_types::FOOTER => {
2023 let part_name =
2024 OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target);
2025 if let Some(xml) = self.package.get_part(&part_name)
2026 && let Ok(hf) = CT_HdrFtr::from_xml(xml)
2027 {
2028 footers.insert(rel.id.clone(), hf);
2029 }
2030 }
2031 t if t == rel_types::IMAGE => {
2032 let part_name =
2033 OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target);
2034 if let Some(data) = self.package.get_part(&part_name) {
2035 let content_type = guess_image_content_type(&part_name);
2036 images.insert(
2037 rel.id.clone(),
2038 ImageData {
2039 data: data.to_vec(),
2040 content_type,
2041 },
2042 );
2043 }
2044 }
2045 t if t == rel_types::HYPERLINK => {
2046 if rel.target_mode.as_ref().is_some_and(|m| m == "External") {
2047 hyperlink_urls.insert(rel.id.clone(), rel.target.clone());
2048 }
2049 }
2050 t if t == rel_types::FOOTNOTES => {
2051 let part_name =
2052 OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target);
2053 if let Some(xml) = self.package.get_part(&part_name) {
2054 footnotes = rdocx_oxml::footnotes::CT_Footnotes::from_xml(xml).ok();
2055 }
2056 }
2057 t if t == rel_types::ENDNOTES => {
2058 let part_name =
2059 OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target);
2060 if let Some(xml) = self.package.get_part(&part_name) {
2061 endnotes = rdocx_oxml::footnotes::CT_Footnotes::from_xml(xml).ok();
2062 }
2063 }
2064 _ => {}
2065 }
2066 }
2067 }
2068
2069 let theme = self
2071 .package
2072 .get_part("/word/theme/theme1.xml")
2073 .and_then(|data| rdocx_oxml::theme::Theme::from_xml(data).ok());
2074
2075 LayoutInput {
2076 document: self.document.clone(),
2077 styles: self.styles.clone(),
2078 numbering: self.numbering.clone(),
2079 headers,
2080 footers,
2081 images,
2082 core_properties: self.core_properties.clone(),
2083 hyperlink_urls,
2084 footnotes,
2085 endnotes,
2086 theme,
2087 fonts,
2088 }
2089 }
2090
2091 fn extract_embedded_fonts(&self) -> Vec<rdocx_layout::FontFile> {
2097 let mut fonts = Vec::new();
2098
2099 for (part_name, data) in &self.package.parts {
2101 let lower = part_name.to_lowercase();
2102 if !lower.contains("/word/fonts/") && !lower.contains("/word/font") {
2103 continue;
2104 }
2105
2106 let file_name = part_name.rsplit('/').next().unwrap_or(part_name);
2108 let family = file_name.split('.').next().unwrap_or(file_name).to_string();
2109
2110 if lower.ends_with(".odttf") {
2111 if let Some(deobfuscated) = deobfuscate_odttf(data, file_name) {
2113 fonts.push(rdocx_layout::FontFile {
2114 family,
2115 data: deobfuscated,
2116 });
2117 }
2118 } else if lower.ends_with(".ttf") || lower.ends_with(".otf") || lower.ends_with(".ttc")
2119 {
2120 fonts.push(rdocx_layout::FontFile {
2121 family,
2122 data: data.clone(),
2123 });
2124 }
2125 }
2126
2127 fonts
2128 }
2129
2130 pub fn load_fonts_from_dir<P: AsRef<Path>>(dir: P) -> Vec<rdocx_layout::FontFile> {
2135 let mut fonts = Vec::new();
2136 let dir = dir.as_ref();
2137 if let Ok(entries) = std::fs::read_dir(dir) {
2138 for entry in entries.flatten() {
2139 let path = entry.path();
2140 let ext = path
2141 .extension()
2142 .and_then(|e| e.to_str())
2143 .unwrap_or("")
2144 .to_lowercase();
2145 if (ext == "ttf" || ext == "otf" || ext == "ttc")
2146 && let Ok(data) = std::fs::read(&path)
2147 {
2148 let family = path
2149 .file_stem()
2150 .and_then(|s| s.to_str())
2151 .unwrap_or("Unknown")
2152 .to_string();
2153 fonts.push(rdocx_layout::FontFile { family, data });
2154 }
2155 }
2156 }
2157 fonts
2158 }
2159
2160 fn save_header_footer(&mut self, rel_id: &str, hf: &CT_HdrFtr, is_header: bool) {
2162 let part_name = {
2163 let rels = self.package.get_part_rels(&self.doc_part_name);
2164 rels.and_then(|r| r.get_by_id(rel_id))
2165 .map(|rel| OpcPackage::resolve_rel_target(&self.doc_part_name, &rel.target))
2166 };
2167 if let Some(part_name) = part_name {
2168 let xml = if is_header {
2169 hf.to_xml_header()
2170 } else {
2171 hf.to_xml_footer()
2172 };
2173 if let Ok(xml) = xml {
2174 self.package.set_part(&part_name, xml);
2175 }
2176 }
2177 }
2178
2179 pub fn headings(&self) -> Vec<(u32, String)> {
2185 let mut result = Vec::new();
2186 for content in &self.document.body.content {
2187 if let BodyContent::Paragraph(p) = content
2188 && let Some(level) = Self::detect_heading_level_for_toc(p)
2189 {
2190 result.push((level, p.text()));
2191 }
2192 }
2193 result
2194 }
2195
2196 pub fn document_outline(&self) -> Vec<OutlineNode> {
2201 let headings = self.headings();
2202 build_outline_tree(&headings)
2203 }
2204
2205 pub fn images(&self) -> Vec<ImageInfo> {
2209 let mut result = Vec::new();
2210
2211 for content in &self.document.body.content {
2212 Self::collect_images_from_content(content, &mut result);
2213 }
2214 result
2215 }
2216
2217 fn collect_images_from_content(content: &BodyContent, result: &mut Vec<ImageInfo>) {
2218 match content {
2219 BodyContent::Paragraph(p) => {
2220 for run in &p.runs {
2221 for rc in &run.content {
2222 if let RunContent::Drawing(drawing) = rc {
2223 if let Some(inline) = &drawing.inline {
2224 result.push(ImageInfo {
2225 embed_id: inline.embed_id.clone(),
2226 name: inline.name.clone(),
2227 description: inline.description.clone(),
2228 width_emu: inline.extent_cx.0,
2229 height_emu: inline.extent_cy.0,
2230 is_anchor: false,
2231 });
2232 }
2233 if let Some(anchor) = &drawing.anchor {
2234 result.push(ImageInfo {
2235 embed_id: anchor.embed_id.clone(),
2236 name: anchor.name.clone(),
2237 description: anchor.description.clone(),
2238 width_emu: anchor.extent_cx.0,
2239 height_emu: anchor.extent_cy.0,
2240 is_anchor: true,
2241 });
2242 }
2243 }
2244 }
2245 }
2246 }
2247 BodyContent::Table(tbl) => {
2248 for row in &tbl.rows {
2249 for cell in &row.cells {
2250 for cc in &cell.content {
2251 match cc {
2252 rdocx_oxml::table::CellContent::Paragraph(p) => {
2253 Self::collect_images_from_content(
2254 &BodyContent::Paragraph(p.clone()),
2255 result,
2256 );
2257 }
2258 rdocx_oxml::table::CellContent::Table(nested) => {
2259 Self::collect_images_from_content(
2260 &BodyContent::Table(nested.clone()),
2261 result,
2262 );
2263 }
2264 }
2265 }
2266 }
2267 }
2268 }
2269 BodyContent::RawXml(_) => {}
2270 }
2271 }
2272
2273 pub fn links(&self) -> Vec<LinkInfo> {
2277 use rdocx_opc::relationship::rel_types;
2278
2279 let mut url_map = std::collections::HashMap::new();
2281 if let Some(rels) = self.package.get_part_rels(&self.doc_part_name) {
2282 for rel in &rels.items {
2283 if rel.rel_type == rel_types::HYPERLINK
2284 && rel.target_mode.as_ref().is_some_and(|m| m == "External")
2285 {
2286 url_map.insert(rel.id.clone(), rel.target.clone());
2287 }
2288 }
2289 }
2290
2291 let mut result = Vec::new();
2292 for content in &self.document.body.content {
2293 if let BodyContent::Paragraph(p) = content {
2294 for hl in &p.hyperlinks {
2295 let text: String = p.runs[hl.run_start..hl.run_end]
2296 .iter()
2297 .map(|r| r.text())
2298 .collect::<Vec<_>>()
2299 .join("");
2300
2301 let url = hl.rel_id.as_ref().and_then(|id| url_map.get(id)).cloned();
2302
2303 result.push(LinkInfo {
2304 text,
2305 url,
2306 anchor: hl.anchor.clone(),
2307 rel_id: hl.rel_id.clone(),
2308 });
2309 }
2310 }
2311 }
2312 result
2313 }
2314
2315 pub fn word_count(&self) -> usize {
2320 let mut count = 0;
2321 for content in &self.document.body.content {
2322 count += Self::word_count_in_content(content);
2323 }
2324 count
2325 }
2326
2327 fn word_count_in_content(content: &BodyContent) -> usize {
2328 match content {
2329 BodyContent::Paragraph(p) => p.text().split_whitespace().count(),
2330 BodyContent::Table(tbl) => {
2331 let mut count = 0;
2332 for row in &tbl.rows {
2333 for cell in &row.cells {
2334 for cc in &cell.content {
2335 match cc {
2336 rdocx_oxml::table::CellContent::Paragraph(p) => {
2337 count += p.text().split_whitespace().count();
2338 }
2339 rdocx_oxml::table::CellContent::Table(nested) => {
2340 count += Self::word_count_in_content(&BodyContent::Table(
2341 nested.clone(),
2342 ));
2343 }
2344 }
2345 }
2346 }
2347 }
2348 count
2349 }
2350 BodyContent::RawXml(_) => 0,
2351 }
2352 }
2353
2354 pub fn audit_accessibility(&self) -> Vec<AccessibilityIssue> {
2359 let mut issues = Vec::new();
2360
2361 if self.title().is_none() {
2363 issues.push(AccessibilityIssue {
2364 severity: IssueSeverity::Warning,
2365 message: "Document has no title".to_string(),
2366 });
2367 }
2368
2369 if self.author().is_none() {
2371 issues.push(AccessibilityIssue {
2372 severity: IssueSeverity::Info,
2373 message: "Document has no author".to_string(),
2374 });
2375 }
2376
2377 let images = self.images();
2379 for img in &images {
2380 let has_alt = img
2381 .description
2382 .as_ref()
2383 .is_some_and(|d| !d.is_empty() && d != "Background");
2384 if !has_alt {
2385 let name = img
2386 .name
2387 .as_deref()
2388 .or(Some(&img.embed_id))
2389 .unwrap_or("unknown");
2390 issues.push(AccessibilityIssue {
2391 severity: IssueSeverity::Error,
2392 message: format!("Image \"{name}\" has no alt text"),
2393 });
2394 }
2395 }
2396
2397 let headings = self.headings();
2399 let mut prev_level: Option<u32> = None;
2400 for (level, text) in &headings {
2401 if let Some(prev) = prev_level
2402 && *level > prev + 1
2403 {
2404 issues.push(AccessibilityIssue {
2405 severity: IssueSeverity::Warning,
2406 message: format!(
2407 "Heading level gap: h{prev} -> h{level} (\"{}\")",
2408 truncate_str(text, 40)
2409 ),
2410 });
2411 }
2412 prev_level = Some(*level);
2413 }
2414
2415 let mut consecutive_empty = 0u32;
2417 for content in &self.document.body.content {
2418 if let BodyContent::Paragraph(p) = content {
2419 if p.text().trim().is_empty() {
2420 consecutive_empty += 1;
2421 if consecutive_empty >= 3 {
2422 issues.push(AccessibilityIssue {
2423 severity: IssueSeverity::Info,
2424 message: format!(
2425 "{consecutive_empty} consecutive empty paragraphs (consider using spacing instead)"
2426 ),
2427 });
2428 }
2429 } else {
2430 consecutive_empty = 0;
2431 }
2432 } else {
2433 consecutive_empty = 0;
2434 }
2435 }
2436
2437 issues
2438 }
2439}
2440
2441impl Default for Document {
2442 fn default() -> Self {
2443 Self::new()
2444 }
2445}
2446
2447fn guess_image_content_type(part_name: &str) -> String {
2449 let ext = part_name.rsplit('.').next().unwrap_or("").to_lowercase();
2450 match ext.as_str() {
2451 "png" => "image/png",
2452 "jpg" | "jpeg" => "image/jpeg",
2453 "gif" => "image/gif",
2454 "bmp" => "image/bmp",
2455 "tiff" | "tif" => "image/tiff",
2456 _ => "image/png",
2457 }
2458 .to_string()
2459}
2460
2461#[derive(Debug, Clone, PartialEq)]
2463pub struct OutlineNode {
2464 pub level: u32,
2466 pub text: String,
2468 pub children: Vec<OutlineNode>,
2470}
2471
2472#[derive(Debug, Clone, PartialEq)]
2474pub struct ImageInfo {
2475 pub embed_id: String,
2477 pub name: Option<String>,
2479 pub description: Option<String>,
2481 pub width_emu: i64,
2483 pub height_emu: i64,
2485 pub is_anchor: bool,
2487}
2488
2489#[derive(Debug, Clone, PartialEq)]
2491pub struct LinkInfo {
2492 pub text: String,
2494 pub url: Option<String>,
2496 pub anchor: Option<String>,
2498 pub rel_id: Option<String>,
2500}
2501
2502#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2504pub enum IssueSeverity {
2505 Info,
2507 Warning,
2509 Error,
2511}
2512
2513#[derive(Debug, Clone, PartialEq)]
2515pub struct AccessibilityIssue {
2516 pub severity: IssueSeverity,
2518 pub message: String,
2520}
2521
2522fn build_outline_tree(headings: &[(u32, String)]) -> Vec<OutlineNode> {
2524 let mut root: Vec<OutlineNode> = Vec::new();
2525 let mut stack: Vec<(u32, usize)> = Vec::new(); for (level, text) in headings {
2528 let node = OutlineNode {
2529 level: *level,
2530 text: text.clone(),
2531 children: Vec::new(),
2532 };
2533
2534 while let Some(&(stack_level, _)) = stack.last() {
2536 if stack_level >= *level {
2537 stack.pop();
2538 } else {
2539 break;
2540 }
2541 }
2542
2543 if stack.is_empty() {
2544 root.push(node);
2545 let idx = root.len() - 1;
2546 stack.push((*level, idx));
2547 } else {
2548 let target = get_outline_parent_mut(&mut root, &stack);
2550 target.children.push(node);
2551 let idx = target.children.len() - 1;
2552 stack.push((*level, idx));
2553 }
2554 }
2555
2556 root
2557}
2558
2559fn get_outline_parent_mut<'a>(
2561 root: &'a mut [OutlineNode],
2562 stack: &[(u32, usize)],
2563) -> &'a mut OutlineNode {
2564 let mut current = &mut root[stack[0].1];
2565 for &(_, idx) in &stack[1..] {
2566 current = &mut current.children[idx];
2567 }
2568 current
2569}
2570
2571fn truncate_str(s: &str, max_len: usize) -> String {
2573 if s.len() <= max_len {
2574 s.to_string()
2575 } else {
2576 let truncated: String = s.chars().take(max_len.saturating_sub(3)).collect();
2577 format!("{truncated}...")
2578 }
2579}
2580
2581fn deobfuscate_odttf(data: &[u8], file_name: &str) -> Option<Vec<u8>> {
2587 if data.len() < 32 {
2588 return None;
2589 }
2590
2591 let name = file_name
2594 .split('.')
2595 .next()
2596 .unwrap_or("")
2597 .trim_start_matches('{')
2598 .trim_end_matches('}');
2599
2600 let hex: String = name.chars().filter(|c| c.is_ascii_hexdigit()).collect();
2602 if hex.len() != 32 {
2603 return None;
2604 }
2605
2606 let mut guid = [0u8; 16];
2607 for (i, byte) in guid.iter_mut().enumerate() {
2608 *byte = u8::from_str_radix(&hex[i * 2..i * 2 + 2], 16).ok()?;
2609 }
2610
2611 let key: [u8; 16] = [
2614 guid[3], guid[2], guid[1], guid[0], guid[5], guid[4], guid[7], guid[6], guid[8], guid[9],
2615 guid[10], guid[11], guid[12], guid[13], guid[14], guid[15],
2616 ];
2617
2618 let mut result = data.to_vec();
2619 for i in 0..32 {
2621 result[i] ^= key[i % 16];
2622 }
2623
2624 Some(result)
2625}
2626
2627#[cfg(test)]
2628mod tests {
2629 use super::*;
2630 use crate::paragraph::Alignment;
2631 use rdocx_oxml::units::{HalfPoint, Twips};
2632
2633 #[test]
2634 fn create_new_document() {
2635 let doc = Document::new();
2636 assert_eq!(doc.paragraph_count(), 0);
2637 assert!(doc.section_properties().is_some());
2638 }
2639
2640 #[test]
2641 fn add_paragraphs() {
2642 let mut doc = Document::new();
2643 doc.add_paragraph("First paragraph");
2644 doc.add_paragraph("Second paragraph");
2645 assert_eq!(doc.paragraph_count(), 2);
2646
2647 let paras = doc.paragraphs();
2648 assert_eq!(paras[0].text(), "First paragraph");
2649 assert_eq!(paras[1].text(), "Second paragraph");
2650 }
2651
2652 #[test]
2653 fn paragraph_formatting() {
2654 let mut doc = Document::new();
2655 doc.add_paragraph("Centered").alignment(Alignment::Center);
2656
2657 let paras = doc.paragraphs();
2658 assert_eq!(paras[0].alignment(), Some(Alignment::Center));
2659 }
2660
2661 #[test]
2662 fn run_formatting() {
2663 let mut doc = Document::new();
2664 let mut para = doc.add_paragraph("");
2665 para.add_run("Bold text").bold(true).size(14.0);
2666
2667 let paras = doc.paragraphs();
2668 let runs: Vec<_> = paras[0].runs().collect();
2669 assert!(runs[0].is_bold());
2670 assert_eq!(runs[0].size(), Some(14.0));
2671 }
2672
2673 #[test]
2674 fn round_trip_in_memory() {
2675 let mut doc = Document::new();
2676 doc.add_paragraph("Hello, World!");
2677 doc.add_paragraph("Second paragraph")
2678 .alignment(Alignment::Center);
2679
2680 let bytes = doc.to_bytes().unwrap();
2681 let doc2 = Document::from_bytes(&bytes).unwrap();
2682
2683 assert_eq!(doc2.paragraph_count(), 2);
2684 let paras = doc2.paragraphs();
2685 assert_eq!(paras[0].text(), "Hello, World!");
2686 assert_eq!(paras[1].text(), "Second paragraph");
2687 assert_eq!(paras[1].alignment(), Some(Alignment::Center));
2688 }
2689
2690 #[test]
2691 fn styles_present() {
2692 let doc = Document::new();
2693 assert!(doc.style("Normal").is_some());
2694 assert!(doc.style("Heading1").is_some());
2695 }
2696
2697 #[test]
2698 fn paragraph_with_style() {
2699 let mut doc = Document::new();
2700 doc.add_paragraph("Title").style("Heading1");
2701
2702 let paras = doc.paragraphs();
2703 assert_eq!(paras[0].style_id(), Some("Heading1"));
2704 }
2705
2706 #[test]
2707 fn multiple_runs_in_paragraph() {
2708 let mut doc = Document::new();
2709 let mut para = doc.add_paragraph("");
2710 para.add_run("Normal ");
2711 para.add_run("bold ").bold(true);
2712 para.add_run("italic").italic(true);
2713
2714 let paras = doc.paragraphs();
2715 assert_eq!(paras[0].text(), "Normal bold italic");
2716 let runs: Vec<_> = paras[0].runs().collect();
2717 assert_eq!(runs.len(), 3);
2718 assert!(!runs[0].is_bold());
2719 assert!(runs[1].is_bold());
2720 assert!(runs[2].is_italic());
2721 }
2722
2723 #[test]
2724 fn add_custom_style() {
2725 let mut doc = Document::new();
2726 doc.add_style(StyleBuilder::paragraph("MyCustom", "My Custom Style").based_on("Normal"));
2727 assert!(doc.style("MyCustom").is_some());
2728 let s = doc.style("MyCustom").unwrap();
2729 assert_eq!(s.name(), Some("My Custom Style"));
2730 assert_eq!(s.based_on(), Some("Normal"));
2731 }
2732
2733 #[test]
2734 fn resolve_style_properties() {
2735 let doc = Document::new();
2736 let ppr = doc.resolve_paragraph_properties(Some("Heading1"));
2738 assert_eq!(ppr.keep_next, Some(true));
2739 assert_eq!(ppr.space_before, Some(Twips(240)));
2740
2741 let ppr = doc.resolve_paragraph_properties(None);
2743 assert_eq!(ppr.space_after, Some(Twips(160)));
2744 }
2745
2746 #[test]
2747 fn resolve_run_style_properties() {
2748 let doc = Document::new();
2749 let rpr = doc.resolve_run_properties(Some("Heading1"), None);
2750 assert_eq!(rpr.bold, Some(true));
2751 assert_eq!(rpr.sz, Some(HalfPoint(32)));
2752 assert_eq!(rpr.font_ascii, Some("Calibri".to_string()));
2753 }
2754
2755 #[test]
2756 fn set_landscape() {
2757 let mut doc = Document::new();
2758 doc.set_landscape();
2759 let sect = doc.section_properties().unwrap();
2760 assert_eq!(sect.orientation, Some(ST_PageOrientation::Landscape));
2761 assert!(sect.page_width.unwrap().0 > sect.page_height.unwrap().0);
2763 }
2764
2765 #[test]
2766 fn set_margins() {
2767 let mut doc = Document::new();
2768 doc.set_margins(
2769 Length::inches(0.5),
2770 Length::inches(0.75),
2771 Length::inches(0.5),
2772 Length::inches(0.75),
2773 );
2774 let sect = doc.section_properties().unwrap();
2775 assert_eq!(sect.margin_top, Some(Twips(720)));
2776 assert_eq!(sect.margin_right, Some(Twips(1080)));
2777 }
2778
2779 #[test]
2780 fn set_columns() {
2781 let mut doc = Document::new();
2782 doc.set_columns(2, Length::inches(0.5));
2783 let sect = doc.section_properties().unwrap();
2784 let cols = sect.columns.as_ref().unwrap();
2785 assert_eq!(cols.num, Some(2));
2786 assert_eq!(cols.space, Some(Twips(720)));
2787 assert_eq!(cols.equal_width, Some(true));
2788 }
2789
2790 #[test]
2791 fn set_page_size() {
2792 let mut doc = Document::new();
2793 doc.set_page_size(Length::cm(21.0), Length::cm(29.7));
2794 let sect = doc.section_properties().unwrap();
2795 let w = sect.page_width.unwrap().0;
2797 let h = sect.page_height.unwrap().0;
2798 assert!((w - 11906).abs() < 5);
2799 assert!((h - 16838).abs() < 5);
2800 }
2801
2802 #[test]
2803 fn set_different_first_page() {
2804 let mut doc = Document::new();
2805 doc.set_different_first_page(true);
2806 assert_eq!(doc.section_properties().unwrap().title_pg, Some(true));
2807 }
2808
2809 #[test]
2810 fn content_insertion_api() {
2811 let mut doc = Document::new();
2812 doc.add_paragraph("First");
2813 doc.add_paragraph("Third");
2814
2815 doc.insert_paragraph(1, "Second");
2817 assert_eq!(doc.content_count(), 3);
2818 let paras = doc.paragraphs();
2819 assert_eq!(paras[0].text(), "First");
2820 assert_eq!(paras[1].text(), "Second");
2821 assert_eq!(paras[2].text(), "Third");
2822
2823 doc.insert_paragraph(0, "Zeroth");
2825 assert_eq!(doc.content_count(), 4);
2826 assert_eq!(doc.paragraphs()[0].text(), "Zeroth");
2827 }
2828
2829 #[test]
2830 fn find_content_index_and_remove() {
2831 let mut doc = Document::new();
2832 doc.add_paragraph("Hello");
2833 doc.add_paragraph("{{PLACEHOLDER}}");
2834 doc.add_paragraph("World");
2835
2836 assert_eq!(doc.find_content_index("{{PLACEHOLDER}}"), Some(1));
2837 assert_eq!(doc.find_content_index("NONEXISTENT"), None);
2838
2839 assert!(doc.remove_content(1));
2840 assert_eq!(doc.content_count(), 2);
2841 assert_eq!(doc.paragraphs()[1].text(), "World");
2842
2843 assert!(!doc.remove_content(10));
2845 }
2846
2847 #[test]
2848 fn insert_table_at_index() {
2849 let mut doc = Document::new();
2850 doc.add_paragraph("Before");
2851 doc.add_paragraph("After");
2852
2853 doc.insert_table(1, 2, 3);
2854 assert_eq!(doc.content_count(), 3);
2855 assert_eq!(doc.table_count(), 1);
2856 let paras = doc.paragraphs();
2858 assert_eq!(paras[0].text(), "Before");
2859 assert_eq!(paras[1].text(), "After");
2860 }
2861
2862 #[test]
2863 fn replace_text_in_body() {
2864 let mut doc = Document::new();
2865 doc.add_paragraph("Hello {{name}}!");
2866 doc.add_paragraph("Welcome to {{company}}.");
2867
2868 let count = doc.replace_text("{{name}}", "Alice");
2869 assert_eq!(count, 1);
2870 assert_eq!(doc.paragraphs()[0].text(), "Hello Alice!");
2871
2872 let count = doc.replace_text("{{company}}", "Acme");
2873 assert_eq!(count, 1);
2874 assert_eq!(doc.paragraphs()[1].text(), "Welcome to Acme.");
2875 }
2876
2877 #[test]
2878 fn replace_text_in_header_and_footer() {
2879 let mut doc = Document::new();
2880 doc.set_header("Header: {{title}}");
2881 doc.set_footer("Footer: {{title}}");
2882 doc.add_paragraph("Body: {{title}}");
2883
2884 let count = doc.replace_text("{{title}}", "My Doc");
2885 assert_eq!(count, 3);
2886
2887 assert_eq!(doc.paragraphs()[0].text(), "Body: My Doc");
2888 assert_eq!(doc.header_text().unwrap(), "Header: My Doc");
2889 assert_eq!(doc.footer_text().unwrap(), "Footer: My Doc");
2890 }
2891
2892 #[test]
2893 fn replace_all_batch() {
2894 let mut doc = Document::new();
2895 doc.add_paragraph("{{a}} and {{b}}");
2896
2897 let mut map = std::collections::HashMap::new();
2898 map.insert("{{a}}", "X");
2899 map.insert("{{b}}", "Y");
2900 let count = doc.replace_all(&map);
2901 assert_eq!(count, 2);
2902 assert_eq!(doc.paragraphs()[0].text(), "X and Y");
2903 }
2904
2905 #[test]
2906 fn template_workflow_round_trip() {
2907 let mut doc = Document::new();
2908 doc.add_paragraph("Company: {{company}}");
2909 doc.add_paragraph("Date: {{date}}");
2910
2911 doc.replace_text("{{company}}", "Acme Corp");
2912 doc.replace_text("{{date}}", "2026-02-22");
2913
2914 let bytes = doc.to_bytes().unwrap();
2916 let doc2 = Document::from_bytes(&bytes).unwrap();
2917 assert_eq!(doc2.paragraphs()[0].text(), "Company: Acme Corp");
2918 assert_eq!(doc2.paragraphs()[1].text(), "Date: 2026-02-22");
2919 }
2920
2921 #[test]
2922 fn add_background_image_round_trip() {
2923 let png_data: Vec<u8> = vec![
2925 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x08, 0x02, 0x00, 0x00, 0x00, 0x90, 0x77, 0x53, 0xde, 0x00, 0x00, 0x00, 0x0c, 0x49,
2929 0x44, 0x41, 0x54, 0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0xe2, 0x21,
2931 0xbc, 0x33, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82,
2933 ];
2934
2935 let mut doc = Document::new();
2936 doc.add_paragraph("Hello World");
2937 doc.add_background_image(&png_data, "bg.png");
2938
2939 assert_eq!(doc.content_count(), 2);
2941
2942 let bytes = doc.to_bytes().unwrap();
2944 let doc2 = Document::from_bytes(&bytes).unwrap();
2945
2946 assert_eq!(doc2.content_count(), 2);
2948 assert_eq!(doc2.paragraphs().last().unwrap().text(), "Hello World");
2950 }
2951
2952 #[test]
2953 fn add_anchored_image() {
2954 let png_data: Vec<u8> = vec![
2955 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00, 0x00, 0x0d, 0x49, 0x48,
2956 0x44, 0x52, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x08, 0x02, 0x00, 0x00,
2957 0x00, 0x90, 0x77, 0x53, 0xde, 0x00, 0x00, 0x00, 0x0c, 0x49, 0x44, 0x41, 0x54, 0x08,
2958 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0xe2, 0x21, 0xbc,
2959 0x33, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82,
2960 ];
2961
2962 let mut doc = Document::new();
2963 doc.add_paragraph("Content");
2964 doc.add_anchored_image(
2965 &png_data,
2966 "overlay.png",
2967 Length::inches(4.0),
2968 Length::inches(3.0),
2969 false,
2970 );
2971 assert_eq!(doc.content_count(), 2);
2972 }
2973
2974 #[test]
2975 fn insert_toc_basic() {
2976 let mut doc = Document::new();
2977 doc.add_paragraph("Introduction");
2978 doc.add_paragraph("Chapter 1").style("Heading1");
2979 doc.add_paragraph("Some text in chapter 1.");
2980 doc.add_paragraph("Section 1.1").style("Heading2");
2981 doc.add_paragraph("Text in section 1.1.");
2982 doc.add_paragraph("Chapter 2").style("Heading1");
2983 doc.add_paragraph("Text in chapter 2.");
2984
2985 assert_eq!(doc.content_count(), 7);
2987
2988 doc.insert_toc(0, 2);
2990
2991 assert_eq!(doc.content_count(), 11);
2993
2994 let paras = doc.paragraphs();
2996 assert_eq!(paras[0].text(), "Table of Contents");
2997
2998 assert_eq!(paras[1].text(), "Chapter 1\t");
3000 assert_eq!(paras[2].text(), "Section 1.1\t");
3001 assert_eq!(paras[3].text(), "Chapter 2\t");
3002
3003 let bytes = doc.to_bytes().expect("should serialize");
3005 let doc2 = Document::from_bytes(&bytes).expect("should open");
3006 assert_eq!(doc2.content_count(), 11);
3007 let paras2 = doc2.paragraphs();
3008 assert_eq!(paras2[0].text(), "Table of Contents");
3009 }
3010
3011 #[test]
3012 fn append_documents() {
3013 let mut doc_a = Document::new();
3014 doc_a.add_paragraph("Paragraph A1");
3015 doc_a.add_paragraph("Paragraph A2");
3016
3017 let mut doc_b = Document::new();
3018 doc_b.add_paragraph("Paragraph B1");
3019 doc_b.add_paragraph("Paragraph B2");
3020 doc_b.add_paragraph("Paragraph B3");
3021
3022 assert_eq!(doc_a.content_count(), 2);
3023 doc_a.append(&doc_b);
3024 assert_eq!(doc_a.content_count(), 5);
3025
3026 let paras = doc_a.paragraphs();
3027 assert_eq!(paras[0].text(), "Paragraph A1");
3028 assert_eq!(paras[1].text(), "Paragraph A2");
3029 assert_eq!(paras[2].text(), "Paragraph B1");
3030 assert_eq!(paras[3].text(), "Paragraph B2");
3031 assert_eq!(paras[4].text(), "Paragraph B3");
3032
3033 let bytes = doc_a.to_bytes().expect("serialize");
3035 let reopened = Document::from_bytes(&bytes).expect("open");
3036 assert_eq!(reopened.content_count(), 5);
3037 }
3038
3039 #[test]
3040 fn append_with_section_break() {
3041 let mut doc_a = Document::new();
3042 doc_a.add_paragraph("A1");
3043
3044 let mut doc_b = Document::new();
3045 doc_b.add_paragraph("B1");
3046
3047 doc_a.append_with_break(&doc_b, crate::SectionBreak::Continuous);
3048 assert_eq!(doc_a.content_count(), 3);
3050 }
3051
3052 #[test]
3053 fn insert_document_at_index() {
3054 let mut doc_a = Document::new();
3055 doc_a.add_paragraph("First");
3056 doc_a.add_paragraph("Last");
3057
3058 let mut doc_b = Document::new();
3059 doc_b.add_paragraph("Middle 1");
3060 doc_b.add_paragraph("Middle 2");
3061
3062 doc_a.insert_document(1, &doc_b);
3063 assert_eq!(doc_a.content_count(), 4);
3064
3065 let paras = doc_a.paragraphs();
3066 assert_eq!(paras[0].text(), "First");
3067 assert_eq!(paras[1].text(), "Middle 1");
3068 assert_eq!(paras[2].text(), "Middle 2");
3069 assert_eq!(paras[3].text(), "Last");
3070 }
3071
3072 #[test]
3073 fn merge_deduplicates_styles() {
3074 let mut doc_a = Document::new();
3075 doc_a.add_paragraph("A").style("Heading1");
3076
3077 let mut doc_b = Document::new();
3078 doc_b.add_paragraph("B").style("Heading1");
3079 doc_b.add_style(
3080 crate::style::StyleBuilder::paragraph("CustomB", "Custom B").based_on("Normal"),
3081 );
3082 doc_b.add_paragraph("C").style("CustomB");
3083
3084 let styles_before = doc_a.styles.styles.len();
3085 doc_a.append(&doc_b);
3086 let styles_after = doc_a.styles.styles.len();
3087
3088 assert_eq!(styles_after, styles_before + 1);
3090 }
3091
3092 #[test]
3093 fn headings_and_outline() {
3094 let mut doc = Document::new();
3095 doc.add_paragraph("Intro");
3096 doc.add_paragraph("Chapter 1").style("Heading1");
3097 doc.add_paragraph("Section 1.1").style("Heading2");
3098 doc.add_paragraph("Section 1.2").style("Heading2");
3099 doc.add_paragraph("Chapter 2").style("Heading1");
3100 doc.add_paragraph("Section 2.1").style("Heading2");
3101 doc.add_paragraph("Sub 2.1.1").style("Heading3");
3102
3103 let headings = doc.headings();
3104 assert_eq!(headings.len(), 6);
3105 assert_eq!(headings[0], (1, "Chapter 1".to_string()));
3106 assert_eq!(headings[1], (2, "Section 1.1".to_string()));
3107 assert_eq!(headings[5], (3, "Sub 2.1.1".to_string()));
3108
3109 let outline = doc.document_outline();
3110 assert_eq!(outline.len(), 2); assert_eq!(outline[0].text, "Chapter 1");
3112 assert_eq!(outline[0].children.len(), 2); assert_eq!(outline[1].text, "Chapter 2");
3114 assert_eq!(outline[1].children.len(), 1); assert_eq!(outline[1].children[0].children.len(), 1); }
3117
3118 #[test]
3119 fn word_count_basic() {
3120 let mut doc = Document::new();
3121 doc.add_paragraph("Hello world");
3122 doc.add_paragraph("Three more words");
3123 assert_eq!(doc.word_count(), 5);
3124 }
3125
3126 #[test]
3127 fn audit_accessibility_missing_metadata() {
3128 let doc = Document::new();
3129 let issues = doc.audit_accessibility();
3130 assert!(issues.iter().any(|i| i.message.contains("no title")));
3132 assert!(issues.iter().any(|i| i.message.contains("no author")));
3133 }
3134
3135 #[test]
3136 fn audit_heading_level_gap() {
3137 let mut doc = Document::new();
3138 doc.set_title("Test");
3139 doc.set_author("Test");
3140 doc.add_paragraph("Ch 1").style("Heading1");
3141 doc.add_paragraph("Skip to 3").style("Heading3");
3142
3143 let issues = doc.audit_accessibility();
3144 assert!(
3145 issues
3146 .iter()
3147 .any(|i| i.message.contains("Heading level gap"))
3148 );
3149 }
3150
3151 #[test]
3152 fn links_returns_empty_for_no_hyperlinks() {
3153 let mut doc = Document::new();
3154 doc.add_paragraph("No links here.");
3155 assert!(doc.links().is_empty());
3156 }
3157
3158 #[test]
3159 fn images_returns_empty_for_text_only() {
3160 let mut doc = Document::new();
3161 doc.add_paragraph("Just text.");
3162 assert!(doc.images().is_empty());
3163 }
3164}