1use crate::error::{Error, Result};
13use crate::ext;
14use crate::generated as types;
15use crate::generated_serializers::ToXml;
16use ooxml_opc::{Package, PackageWriter, Relationships, rel_type, rels_path_for};
17use ooxml_xml::{PositionedNode, RawXmlElement, RawXmlNode};
18use quick_xml::Reader;
19use quick_xml::events::Event;
20use std::fs::File;
21use std::io::{BufReader, Read, Seek};
22use std::path::Path;
23
24pub struct Document<R> {
32 package: Package<R>,
33 gen_doc: types::Document,
34 gen_styles: types::Styles,
35 doc_rels: Relationships,
37 doc_path: String,
39 styles_path: Option<String>,
41 core_properties: Option<CoreProperties>,
43 app_properties: Option<AppProperties>,
45}
46
47impl Document<BufReader<File>> {
48 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
50 let file = File::open(path)?;
51 let reader = BufReader::new(file);
52 Self::from_reader(reader)
53 }
54}
55
56impl<R: Read + Seek> Document<R> {
57 pub fn from_reader(reader: R) -> Result<Self> {
59 let mut package = Package::open(reader)?;
60
61 let rels = package.read_relationships()?;
63 let doc_rel = rels
64 .get_by_type(rel_type::OFFICE_DOCUMENT)
65 .ok_or_else(|| Error::MissingPart("main document relationship".into()))?;
66
67 let doc_path = doc_rel.target.clone();
68
69 let doc_xml = package.read_part(&doc_path)?;
71 let gen_doc = ext::parse_document(&doc_xml)?;
72
73 let doc_rels_path = rels_path_for(&doc_path);
75 let doc_rels = if package.has_part(&doc_rels_path) {
76 let rels_xml = package.read_part(&doc_rels_path)?;
77 Relationships::parse(&rels_xml[..])?
78 } else {
79 Relationships::new()
80 };
81
82 let (gen_styles, styles_path) = if let Some(styles_rel) = rels.get_by_type(rel_type::STYLES)
86 {
87 let path = styles_rel.target.clone();
88 let styles_xml = package.read_part(&path)?;
89 (ext::parse_styles(&styles_xml)?, Some(path))
90 } else if let Some(styles_rel) = doc_rels.get_by_type(rel_type::STYLES) {
91 let path = resolve_path(&doc_path, &styles_rel.target);
92 let styles_xml = package.read_part(&path)?;
93 (ext::parse_styles(&styles_xml)?, Some(path))
94 } else {
95 (types::Styles::default(), None)
96 };
97
98 let core_properties = if let Some(core_rel) = rels.get_by_type(rel_type::CORE_PROPERTIES) {
100 let core_xml = package.read_part(&core_rel.target)?;
101 Some(parse_core_properties(&core_xml)?)
102 } else {
103 None
104 };
105
106 let app_properties = if let Some(app_rel) = rels.get_by_type(rel_type::EXTENDED_PROPERTIES)
108 {
109 let app_xml = package.read_part(&app_rel.target)?;
110 Some(parse_app_properties(&app_xml)?)
111 } else {
112 None
113 };
114
115 Ok(Self {
116 package,
117 gen_doc,
118 gen_styles,
119 doc_rels,
120 doc_path,
121 styles_path,
122 core_properties,
123 app_properties,
124 })
125 }
126
127 pub fn body(&self) -> &types::Body {
132 self.gen_doc
133 .body
134 .as_deref()
135 .expect("document has no body element")
136 }
137
138 pub fn body_mut(&mut self) -> &mut types::Body {
140 self.gen_doc
141 .body
142 .as_deref_mut()
143 .expect("document has no body element")
144 }
145
146 pub fn gen_doc(&self) -> &types::Document {
148 &self.gen_doc
149 }
150
151 pub fn package(&self) -> &Package<R> {
153 &self.package
154 }
155
156 pub fn package_mut(&mut self) -> &mut Package<R> {
158 &mut self.package
159 }
160
161 pub fn styles(&self) -> &types::Styles {
163 &self.gen_styles
164 }
165
166 pub fn core_properties(&self) -> Option<&CoreProperties> {
170 self.core_properties.as_ref()
171 }
172
173 pub fn app_properties(&self) -> Option<&AppProperties> {
177 self.app_properties.as_ref()
178 }
179
180 pub fn text(&self) -> String {
184 use crate::ext::BodyExt;
185 self.gen_doc
186 .body
187 .as_deref()
188 .map(|b| b.text())
189 .unwrap_or_default()
190 }
191
192 pub fn get_image_data(&mut self, rel_id: &str) -> Result<ImageData> {
197 let rel = self
199 .doc_rels
200 .get(rel_id)
201 .ok_or_else(|| Error::MissingPart(format!("image relationship {}", rel_id)))?;
202
203 let image_path = resolve_path(&self.doc_path, &rel.target);
205
206 let data = self.package.read_part(&image_path)?;
208
209 let content_type = content_type_from_path(&image_path);
211
212 Ok(ImageData { content_type, data })
213 }
214
215 pub fn get_hyperlink_url(&self, rel_id: &str) -> Option<&str> {
219 self.doc_rels.get(rel_id).map(|rel| rel.target.as_str())
220 }
221
222 pub fn doc_relationships(&self) -> &Relationships {
224 &self.doc_rels
225 }
226
227 pub fn get_header(&mut self, rel_id: &str) -> Result<types::HeaderFooter> {
231 let rel = self
232 .doc_rels
233 .get(rel_id)
234 .ok_or_else(|| Error::MissingPart(format!("header relationship {}", rel_id)))?;
235
236 let header_path = resolve_path(&self.doc_path, &rel.target);
237 let header_xml = self.package.read_part(&header_path)?;
238 Ok(ext::parse_hdr_ftr(&header_xml)?)
239 }
240
241 pub fn get_footer(&mut self, rel_id: &str) -> Result<types::HeaderFooter> {
245 let rel = self
246 .doc_rels
247 .get(rel_id)
248 .ok_or_else(|| Error::MissingPart(format!("footer relationship {}", rel_id)))?;
249
250 let footer_path = resolve_path(&self.doc_path, &rel.target);
251 let footer_xml = self.package.read_part(&footer_path)?;
252 Ok(ext::parse_hdr_ftr(&footer_xml)?)
253 }
254
255 pub fn get_footnotes(&mut self) -> Result<types::Footnotes> {
261 let footnotes_rel = self
262 .doc_rels
263 .get_by_type(
264 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes",
265 )
266 .ok_or_else(|| Error::MissingPart("footnotes relationship".into()))?;
267
268 let footnotes_path = resolve_path(&self.doc_path, &footnotes_rel.target);
269 let footnotes_xml = self.package.read_part(&footnotes_path)?;
270 Ok(ext::parse_footnotes(&footnotes_xml)?)
271 }
272
273 pub fn get_endnotes(&mut self) -> Result<types::Endnotes> {
279 let endnotes_rel = self
280 .doc_rels
281 .get_by_type(
282 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes",
283 )
284 .ok_or_else(|| Error::MissingPart("endnotes relationship".into()))?;
285
286 let endnotes_path = resolve_path(&self.doc_path, &endnotes_rel.target);
287 let endnotes_xml = self.package.read_part(&endnotes_path)?;
288 Ok(ext::parse_endnotes(&endnotes_xml)?)
289 }
290
291 pub fn get_comments(&mut self) -> Result<types::Comments> {
297 let comments_rel = self
298 .doc_rels
299 .get_by_type(
300 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments",
301 )
302 .ok_or_else(|| Error::MissingPart("comments relationship".into()))?;
303
304 let comments_path = resolve_path(&self.doc_path, &comments_rel.target);
305 let comments_xml = self.package.read_part(&comments_path)?;
306 Ok(ext::parse_comments(&comments_xml)?)
307 }
308
309 pub fn get_settings(&mut self) -> Result<DocumentSettings> {
315 let settings_rel = self
316 .doc_rels
317 .get_by_type(
318 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings",
319 )
320 .ok_or_else(|| Error::MissingPart("settings relationship".into()))?;
321
322 let settings_path = resolve_path(&self.doc_path, &settings_rel.target);
323 let settings_xml = self.package.read_part(&settings_path)?;
324 parse_settings(&settings_xml)
325 }
326
327 #[cfg(feature = "wml-charts")]
340 pub fn get_chart(&mut self, rel_id: &str) -> Result<ooxml_dml::types::ChartSpace> {
341 let rel = self
342 .doc_rels
343 .get(rel_id)
344 .ok_or_else(|| Error::MissingPart(format!("chart relationship {}", rel_id)))?;
345
346 let chart_path = resolve_path(&self.doc_path, &rel.target);
347 let chart_xml = self.package.read_part(&chart_path)?;
348 ext::parse_chart(&chart_xml).map_err(|e| Error::Invalid(e.to_string()))
349 }
350
351 pub fn save<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
356 let file = File::create(path)?;
357 self.write(file)
358 }
359
360 pub fn write<W: std::io::Write + Seek>(&mut self, writer: W) -> Result<()> {
366 let doc_xml = serialize_xml(&self.gen_doc, "w:document")?;
368
369 let mut replacements = std::collections::HashMap::new();
371 replacements.insert(self.doc_path.as_str(), doc_xml.as_slice());
372
373 let styles_xml;
375 if let Some(ref styles_path) = self.styles_path {
376 styles_xml = serialize_xml(&self.gen_styles, "w:styles")?;
377 replacements.insert(styles_path.as_str(), styles_xml.as_slice());
378 }
379
380 let mut pkg_writer = PackageWriter::new(writer);
382 self.package
383 .copy_to_writer(&mut pkg_writer, &replacements)?;
384 pkg_writer.finish()?;
385
386 Ok(())
387 }
388}
389
390pub(crate) fn serialize_xml(value: &impl ToXml, tag: &str) -> Result<Vec<u8>> {
396 let inner = Vec::new();
397 let mut writer = quick_xml::Writer::new(inner);
398 value.write_element(tag, &mut writer)?;
399 let inner = writer.into_inner();
400
401 let mut buf = Vec::with_capacity(
402 b"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n".len() + inner.len(),
403 );
404 buf.extend_from_slice(b"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n");
405 buf.extend_from_slice(&inner);
406 Ok(buf)
407}
408
409pub(crate) fn resolve_path(base: &str, relative: &str) -> String {
415 if let Some(stripped) = relative.strip_prefix('/') {
417 return stripped.to_string();
418 }
419
420 if let Some(slash_pos) = base.rfind('/') {
422 format!("{}/{}", &base[..slash_pos], relative)
423 } else {
424 relative.to_string()
425 }
426}
427
428pub(crate) fn content_type_from_path(path: &str) -> String {
430 let ext = path.rsplit('.').next().unwrap_or("").to_lowercase();
431 match ext.as_str() {
432 "png" => "image/png",
433 "jpg" | "jpeg" => "image/jpeg",
434 "gif" => "image/gif",
435 "bmp" => "image/bmp",
436 "tiff" | "tif" => "image/tiff",
437 "webp" => "image/webp",
438 "svg" => "image/svg+xml",
439 "emf" => "image/x-emf",
440 "wmf" => "image/x-wmf",
441 _ => "application/octet-stream",
442 }
443 .to_string()
444}
445
446#[derive(Debug, Clone)]
452pub struct ImageData {
453 pub content_type: String,
455 pub data: Vec<u8>,
457}
458
459#[derive(Debug, Clone, Default)]
467pub struct DocumentSettings {
468 pub default_tab_stop: Option<u32>,
471 pub zoom_percent: Option<u32>,
473 pub display_background_shape: bool,
475 pub track_revisions: bool,
477 pub do_not_track_moves: bool,
479 pub do_not_track_formatting: bool,
481 pub spelling_state: Option<ProofState>,
483 pub grammar_state: Option<ProofState>,
485 pub character_spacing_control: Option<CharacterSpacingControl>,
487 pub compat_mode: Option<u32>,
489 pub unknown_children: Vec<PositionedNode>,
491}
492
493#[derive(Debug, Clone, Copy, PartialEq, Eq)]
495pub enum ProofState {
496 Clean,
498 Dirty,
500}
501
502#[derive(Debug, Clone, Copy, PartialEq, Eq)]
504pub enum CharacterSpacingControl {
505 DoNotCompress,
507 CompressPunctuation,
509 CompressPunctuationAndJapaneseKana,
511}
512
513#[derive(Debug, Clone, Default)]
520pub struct CoreProperties {
521 pub title: Option<String>,
523 pub creator: Option<String>,
525 pub subject: Option<String>,
527 pub description: Option<String>,
529 pub keywords: Option<String>,
531 pub category: Option<String>,
533 pub last_modified_by: Option<String>,
535 pub revision: Option<String>,
537 pub created: Option<String>,
539 pub modified: Option<String>,
541 pub content_status: Option<String>,
543}
544
545#[derive(Debug, Clone, Default)]
552pub struct AppProperties {
553 pub application: Option<String>,
555 pub app_version: Option<String>,
557 pub company: Option<String>,
559 pub manager: Option<String>,
561 pub total_time: Option<u32>,
563 pub pages: Option<u32>,
565 pub words: Option<u32>,
567 pub characters: Option<u32>,
569 pub characters_with_spaces: Option<u32>,
571 pub paragraphs: Option<u32>,
573 pub lines: Option<u32>,
575 pub template: Option<String>,
577 pub doc_security: Option<u32>,
579}
580
581const EL_SETTINGS: &[u8] = b"settings";
587const EL_DEFAULT_TAB_STOP: &[u8] = b"defaultTabStop";
588const EL_ZOOM: &[u8] = b"zoom";
589const EL_DISPLAY_BACKGROUND_SHAPE: &[u8] = b"displayBackgroundShape";
590const EL_TRACK_REVISIONS: &[u8] = b"trackRevisions";
591const EL_DO_NOT_TRACK_MOVES: &[u8] = b"doNotTrackMoves";
592const EL_DO_NOT_TRACK_FORMATTING: &[u8] = b"doNotTrackFormatting";
593const EL_PROOF_STATE: &[u8] = b"proofState";
594const EL_CHAR_SPACE_CONTROL: &[u8] = b"characterSpacingControl";
595const EL_COMPAT: &[u8] = b"compat";
596const EL_COMPAT_SETTING: &[u8] = b"compatSetting";
597
598fn parse_settings(xml: &[u8]) -> Result<DocumentSettings> {
602 let mut reader = Reader::from_reader(xml);
603 reader.config_mut().trim_text(false);
604
605 let mut buf = Vec::new();
606 let mut settings = DocumentSettings::default();
607 let mut in_settings = false;
608 let mut in_compat = false;
609 let mut child_idx: usize = 0;
610
611 loop {
612 match reader.read_event_into(&mut buf) {
613 Ok(Event::Start(e)) => {
614 let name = e.name();
615 let local = local_name(name.as_ref());
616 if local == EL_SETTINGS {
617 in_settings = true;
618 } else if in_settings && local == EL_COMPAT {
619 in_compat = true;
620 child_idx += 1;
621 } else if in_settings && !in_compat {
622 let node = RawXmlElement::from_reader(&mut reader, &e)?;
624 settings
625 .unknown_children
626 .push(PositionedNode::new(child_idx, RawXmlNode::Element(node)));
627 child_idx += 1;
628 }
629 }
630 Ok(Event::Empty(e)) => {
631 let name = e.name();
632 let local = local_name(name.as_ref());
633
634 if !in_settings {
635 continue;
636 }
637
638 if local == EL_DEFAULT_TAB_STOP {
639 for attr in e.attributes().flatten() {
640 let key = local_name(attr.key.as_ref());
641 if key == b"val"
642 && let Ok(s) = std::str::from_utf8(&attr.value)
643 {
644 settings.default_tab_stop = s.parse().ok();
645 }
646 }
647 child_idx += 1;
648 } else if local == EL_ZOOM {
649 for attr in e.attributes().flatten() {
650 let key = local_name(attr.key.as_ref());
651 if key == b"percent"
652 && let Ok(s) = std::str::from_utf8(&attr.value)
653 {
654 settings.zoom_percent = s.parse().ok();
655 }
656 }
657 child_idx += 1;
658 } else if local == EL_DISPLAY_BACKGROUND_SHAPE {
659 settings.display_background_shape = parse_toggle_val(&e);
660 child_idx += 1;
661 } else if local == EL_TRACK_REVISIONS {
662 settings.track_revisions = parse_toggle_val(&e);
663 child_idx += 1;
664 } else if local == EL_DO_NOT_TRACK_MOVES {
665 settings.do_not_track_moves = parse_toggle_val(&e);
666 child_idx += 1;
667 } else if local == EL_DO_NOT_TRACK_FORMATTING {
668 settings.do_not_track_formatting = parse_toggle_val(&e);
669 child_idx += 1;
670 } else if local == EL_PROOF_STATE {
671 for attr in e.attributes().flatten() {
672 let key = local_name(attr.key.as_ref());
673 if let Ok(s) = std::str::from_utf8(&attr.value) {
674 if key == b"spelling" {
675 settings.spelling_state = match s {
676 "clean" => Some(ProofState::Clean),
677 "dirty" => Some(ProofState::Dirty),
678 _ => None,
679 };
680 } else if key == b"grammar" {
681 settings.grammar_state = match s {
682 "clean" => Some(ProofState::Clean),
683 "dirty" => Some(ProofState::Dirty),
684 _ => None,
685 };
686 }
687 }
688 }
689 child_idx += 1;
690 } else if local == EL_CHAR_SPACE_CONTROL {
691 for attr in e.attributes().flatten() {
692 let key = local_name(attr.key.as_ref());
693 if key == b"val"
694 && let Ok(s) = std::str::from_utf8(&attr.value)
695 {
696 settings.character_spacing_control = match s {
697 "doNotCompress" => Some(CharacterSpacingControl::DoNotCompress),
698 "compressPunctuation" => {
699 Some(CharacterSpacingControl::CompressPunctuation)
700 }
701 "compressPunctuationAndJapaneseKana" => Some(
702 CharacterSpacingControl::CompressPunctuationAndJapaneseKana,
703 ),
704 _ => None,
705 };
706 }
707 }
708 child_idx += 1;
709 } else if in_compat && local == EL_COMPAT_SETTING {
710 for attr in e.attributes().flatten() {
712 let key = local_name(attr.key.as_ref());
713 if key == b"name" && &*attr.value == b"compatibilityMode" {
714 for attr2 in e.attributes().flatten() {
716 let key2 = local_name(attr2.key.as_ref());
717 if key2 == b"val"
718 && let Ok(s) = std::str::from_utf8(&attr2.value)
719 {
720 settings.compat_mode = s.parse().ok();
721 }
722 }
723 }
724 }
725 } else if !in_compat {
726 let node = RawXmlElement::from_empty(&e);
728 settings
729 .unknown_children
730 .push(PositionedNode::new(child_idx, RawXmlNode::Element(node)));
731 child_idx += 1;
732 }
733 }
734 Ok(Event::End(e)) => {
735 let name = e.name();
736 let local = local_name(name.as_ref());
737 if local == EL_SETTINGS {
738 break;
739 } else if local == EL_COMPAT {
740 in_compat = false;
741 }
742 }
743 Ok(Event::Eof) => break,
744 Err(e) => {
745 return Err(Error::Xml(e)
746 .with_context("word/settings.xml")
747 .at_position(reader.error_position()));
748 }
749 _ => {}
750 }
751 buf.clear();
752 }
753
754 Ok(settings)
755}
756
757fn parse_core_properties(xml: &[u8]) -> Result<CoreProperties> {
762 let mut reader = Reader::from_reader(xml);
763 reader.config_mut().trim_text(false);
764
765 let mut buf = Vec::new();
766 let mut props = CoreProperties::default();
767 let mut in_core = false;
768 let mut current_element: Option<&'static str> = None;
769
770 loop {
771 match reader.read_event_into(&mut buf) {
772 Ok(Event::Start(e)) => {
773 let name = e.name();
774 let local = local_name(name.as_ref());
775 if local == b"coreProperties" {
776 in_core = true;
777 } else if in_core {
778 current_element = match local {
780 b"title" => Some("title"),
781 b"creator" => Some("creator"),
782 b"subject" => Some("subject"),
783 b"description" => Some("description"),
784 b"keywords" => Some("keywords"),
785 b"category" => Some("category"),
786 b"lastModifiedBy" => Some("lastModifiedBy"),
787 b"revision" => Some("revision"),
788 b"created" => Some("created"),
789 b"modified" => Some("modified"),
790 b"contentStatus" => Some("contentStatus"),
791 _ => None,
792 };
793 }
794 }
795 Ok(Event::End(e)) => {
796 let name = e.name();
797 let local = local_name(name.as_ref());
798 if local == b"coreProperties" {
799 in_core = false;
800 } else if in_core {
801 current_element = None;
802 }
803 }
804 Ok(Event::Text(e)) if current_element.is_some() => {
805 let text = e.decode().ok().map(|s| s.into_owned());
806 match current_element {
807 Some("title") => props.title = text,
808 Some("creator") => props.creator = text,
809 Some("subject") => props.subject = text,
810 Some("description") => props.description = text,
811 Some("keywords") => props.keywords = text,
812 Some("category") => props.category = text,
813 Some("lastModifiedBy") => props.last_modified_by = text,
814 Some("revision") => props.revision = text,
815 Some("created") => props.created = text,
816 Some("modified") => props.modified = text,
817 Some("contentStatus") => props.content_status = text,
818 _ => {}
819 }
820 }
821 Ok(Event::Eof) => break,
822 Err(e) => {
823 return Err(Error::Xml(e)
824 .with_context("docProps/core.xml")
825 .at_position(reader.error_position()));
826 }
827 _ => {}
828 }
829 buf.clear();
830 }
831
832 Ok(props)
833}
834
835fn parse_app_properties(xml: &[u8]) -> Result<AppProperties> {
839 let mut reader = Reader::from_reader(xml);
840 reader.config_mut().trim_text(false);
841
842 let mut buf = Vec::new();
843 let mut props = AppProperties::default();
844 let mut in_props = false;
845 let mut current_element: Option<&'static str> = None;
846
847 loop {
848 match reader.read_event_into(&mut buf) {
849 Ok(Event::Start(e)) => {
850 let name = e.name();
851 let local = local_name(name.as_ref());
852 if local == b"Properties" {
853 in_props = true;
854 } else if in_props {
855 current_element = match local {
857 b"Application" => Some("Application"),
858 b"AppVersion" => Some("AppVersion"),
859 b"Company" => Some("Company"),
860 b"Manager" => Some("Manager"),
861 b"TotalTime" => Some("TotalTime"),
862 b"Pages" => Some("Pages"),
863 b"Words" => Some("Words"),
864 b"Characters" => Some("Characters"),
865 b"CharactersWithSpaces" => Some("CharactersWithSpaces"),
866 b"Paragraphs" => Some("Paragraphs"),
867 b"Lines" => Some("Lines"),
868 b"Template" => Some("Template"),
869 b"DocSecurity" => Some("DocSecurity"),
870 _ => None,
871 };
872 }
873 }
874 Ok(Event::End(e)) => {
875 let name = e.name();
876 let local = local_name(name.as_ref());
877 if local == b"Properties" {
878 in_props = false;
879 } else if in_props {
880 current_element = None;
881 }
882 }
883 Ok(Event::Text(e)) if current_element.is_some() => {
884 let text = e.decode().ok().map(|s| s.into_owned());
885 match current_element {
886 Some("Application") => props.application = text,
887 Some("AppVersion") => props.app_version = text,
888 Some("Company") => props.company = text,
889 Some("Manager") => props.manager = text,
890 Some("TotalTime") => {
891 props.total_time = text.as_deref().and_then(|s| s.parse().ok())
892 }
893 Some("Pages") => props.pages = text.as_deref().and_then(|s| s.parse().ok()),
894 Some("Words") => props.words = text.as_deref().and_then(|s| s.parse().ok()),
895 Some("Characters") => {
896 props.characters = text.as_deref().and_then(|s| s.parse().ok())
897 }
898 Some("CharactersWithSpaces") => {
899 props.characters_with_spaces = text.as_deref().and_then(|s| s.parse().ok())
900 }
901 Some("Paragraphs") => {
902 props.paragraphs = text.as_deref().and_then(|s| s.parse().ok())
903 }
904 Some("Lines") => props.lines = text.as_deref().and_then(|s| s.parse().ok()),
905 Some("Template") => props.template = text,
906 Some("DocSecurity") => {
907 props.doc_security = text.as_deref().and_then(|s| s.parse().ok())
908 }
909 _ => {}
910 }
911 }
912 Ok(Event::Eof) => break,
913 Err(e) => {
914 return Err(Error::Xml(e)
915 .with_context("docProps/app.xml")
916 .at_position(reader.error_position()));
917 }
918 _ => {}
919 }
920 buf.clear();
921 }
922
923 Ok(props)
924}
925
926fn local_name(name: &[u8]) -> &[u8] {
932 if let Some(pos) = name.iter().position(|&b| b == b':') {
934 &name[pos + 1..]
935 } else {
936 name
937 }
938}
939
940fn parse_toggle_val(e: &quick_xml::events::BytesStart) -> bool {
946 for attr in e.attributes().filter_map(|a| a.ok()) {
947 if attr.key.as_ref() == b"w:val" || attr.key.as_ref() == b"val" {
948 let val: &[u8] = &attr.value;
949 return matches!(val, b"true" | b"1" | b"on" | b"True" | b"On");
950 }
951 }
952 true
954}
955
956use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event as XmlEvent};
961
962fn write_text_elem(writer: &mut quick_xml::Writer<Vec<u8>>, tag: &str, text: &str) -> Result<()> {
964 writer.write_event(XmlEvent::Start(BytesStart::new(tag)))?;
965 writer.write_event(XmlEvent::Text(BytesText::new(text)))?;
966 writer.write_event(XmlEvent::End(BytesEnd::new(tag)))?;
967 Ok(())
968}
969
970fn write_text_elem_attr(
972 writer: &mut quick_xml::Writer<Vec<u8>>,
973 tag: &str,
974 attr_name: &str,
975 attr_val: &str,
976 text: &str,
977) -> Result<()> {
978 let mut start = BytesStart::new(tag);
979 start.push_attribute((attr_name, attr_val));
980 writer.write_event(XmlEvent::Start(start))?;
981 writer.write_event(XmlEvent::Text(BytesText::new(text)))?;
982 writer.write_event(XmlEvent::End(BytesEnd::new(tag)))?;
983 Ok(())
984}
985
986pub(crate) fn serialize_core_properties(props: &CoreProperties) -> Result<Vec<u8>> {
990 let mut writer = quick_xml::Writer::new(Vec::new());
991
992 writer.write_event(XmlEvent::Decl(BytesDecl::new(
993 "1.0",
994 Some("UTF-8"),
995 Some("yes"),
996 )))?;
997
998 let mut root = BytesStart::new("cp:coreProperties");
999 root.push_attribute((
1000 "xmlns:cp",
1001 "http://schemas.openxmlformats.org/package/2006/metadata/core-properties",
1002 ));
1003 root.push_attribute(("xmlns:dc", "http://purl.org/dc/elements/1.1/"));
1004 root.push_attribute(("xmlns:dcterms", "http://purl.org/dc/terms/"));
1005 root.push_attribute(("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"));
1006 writer.write_event(XmlEvent::Start(root))?;
1007
1008 if let Some(ref v) = props.title {
1009 write_text_elem(&mut writer, "dc:title", v)?;
1010 }
1011 if let Some(ref v) = props.creator {
1012 write_text_elem(&mut writer, "dc:creator", v)?;
1013 }
1014 if let Some(ref v) = props.subject {
1015 write_text_elem(&mut writer, "dc:subject", v)?;
1016 }
1017 if let Some(ref v) = props.description {
1018 write_text_elem(&mut writer, "dc:description", v)?;
1019 }
1020 if let Some(ref v) = props.keywords {
1021 write_text_elem(&mut writer, "cp:keywords", v)?;
1022 }
1023 if let Some(ref v) = props.category {
1024 write_text_elem(&mut writer, "cp:category", v)?;
1025 }
1026 if let Some(ref v) = props.last_modified_by {
1027 write_text_elem(&mut writer, "cp:lastModifiedBy", v)?;
1028 }
1029 if let Some(ref v) = props.revision {
1030 write_text_elem(&mut writer, "cp:revision", v)?;
1031 }
1032 if let Some(ref v) = props.created {
1033 write_text_elem_attr(
1034 &mut writer,
1035 "dcterms:created",
1036 "xsi:type",
1037 "dcterms:W3CDTF",
1038 v,
1039 )?;
1040 }
1041 if let Some(ref v) = props.modified {
1042 write_text_elem_attr(
1043 &mut writer,
1044 "dcterms:modified",
1045 "xsi:type",
1046 "dcterms:W3CDTF",
1047 v,
1048 )?;
1049 }
1050 if let Some(ref v) = props.content_status {
1051 write_text_elem(&mut writer, "cp:contentStatus", v)?;
1052 }
1053
1054 writer.write_event(XmlEvent::End(BytesEnd::new("cp:coreProperties")))?;
1055 Ok(writer.into_inner())
1056}
1057
1058pub(crate) fn serialize_app_properties(props: &AppProperties) -> Result<Vec<u8>> {
1062 let mut writer = quick_xml::Writer::new(Vec::new());
1063
1064 writer.write_event(XmlEvent::Decl(BytesDecl::new(
1065 "1.0",
1066 Some("UTF-8"),
1067 Some("yes"),
1068 )))?;
1069
1070 let mut root = BytesStart::new("Properties");
1071 root.push_attribute((
1072 "xmlns",
1073 "http://schemas.openxmlformats.org/officeDocument/2006/extended-properties",
1074 ));
1075 root.push_attribute((
1076 "xmlns:vt",
1077 "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes",
1078 ));
1079 writer.write_event(XmlEvent::Start(root))?;
1080
1081 if let Some(ref v) = props.application {
1082 write_text_elem(&mut writer, "Application", v)?;
1083 }
1084 if let Some(ref v) = props.app_version {
1085 write_text_elem(&mut writer, "AppVersion", v)?;
1086 }
1087 if let Some(ref v) = props.company {
1088 write_text_elem(&mut writer, "Company", v)?;
1089 }
1090 if let Some(ref v) = props.manager {
1091 write_text_elem(&mut writer, "Manager", v)?;
1092 }
1093 if let Some(v) = props.total_time {
1094 write_text_elem(&mut writer, "TotalTime", &v.to_string())?;
1095 }
1096 if let Some(v) = props.pages {
1097 write_text_elem(&mut writer, "Pages", &v.to_string())?;
1098 }
1099 if let Some(v) = props.words {
1100 write_text_elem(&mut writer, "Words", &v.to_string())?;
1101 }
1102 if let Some(v) = props.characters {
1103 write_text_elem(&mut writer, "Characters", &v.to_string())?;
1104 }
1105 if let Some(v) = props.characters_with_spaces {
1106 write_text_elem(&mut writer, "CharactersWithSpaces", &v.to_string())?;
1107 }
1108 if let Some(v) = props.paragraphs {
1109 write_text_elem(&mut writer, "Paragraphs", &v.to_string())?;
1110 }
1111 if let Some(v) = props.lines {
1112 write_text_elem(&mut writer, "Lines", &v.to_string())?;
1113 }
1114 if let Some(ref v) = props.template {
1115 write_text_elem(&mut writer, "Template", v)?;
1116 }
1117 if let Some(v) = props.doc_security {
1118 write_text_elem(&mut writer, "DocSecurity", &v.to_string())?;
1119 }
1120
1121 writer.write_event(XmlEvent::End(BytesEnd::new("Properties")))?;
1122 Ok(writer.into_inner())
1123}
1124
1125#[cfg(test)]
1130mod tests {
1131 use super::*;
1132
1133 #[test]
1134 fn test_resolve_path() {
1135 assert_eq!(
1137 resolve_path("word/document.xml", "media/image1.png"),
1138 "word/media/image1.png"
1139 );
1140 assert_eq!(
1141 resolve_path("word/document.xml", "../media/image1.png"),
1142 "word/../media/image1.png"
1143 );
1144
1145 assert_eq!(
1147 resolve_path("word/document.xml", "/word/media/image1.png"),
1148 "word/media/image1.png"
1149 );
1150 }
1151
1152 #[test]
1153 fn test_content_type_from_path() {
1154 assert_eq!(content_type_from_path("word/media/image1.png"), "image/png");
1155 assert_eq!(
1156 content_type_from_path("word/media/image2.jpg"),
1157 "image/jpeg"
1158 );
1159 assert_eq!(
1160 content_type_from_path("word/media/image3.JPEG"),
1161 "image/jpeg"
1162 );
1163 assert_eq!(content_type_from_path("word/media/image4.gif"), "image/gif");
1164 assert_eq!(
1165 content_type_from_path("word/media/unknown.xyz"),
1166 "application/octet-stream"
1167 );
1168 }
1169
1170 #[test]
1171 fn test_parse_core_properties() {
1172 let xml = br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
1173<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
1174 xmlns:dc="http://purl.org/dc/elements/1.1/"
1175 xmlns:dcterms="http://purl.org/dc/terms/"
1176 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
1177 <dc:title>Test Document Title</dc:title>
1178 <dc:creator>John Doe</dc:creator>
1179 <dc:subject>Testing</dc:subject>
1180 <dc:description>A test document for unit testing.</dc:description>
1181 <cp:keywords>test, unit, document</cp:keywords>
1182 <cp:category>Testing</cp:category>
1183 <cp:lastModifiedBy>Jane Doe</cp:lastModifiedBy>
1184 <cp:revision>5</cp:revision>
1185 <dcterms:created xsi:type="dcterms:W3CDTF">2024-01-15T10:30:00Z</dcterms:created>
1186 <dcterms:modified xsi:type="dcterms:W3CDTF">2024-01-16T14:45:00Z</dcterms:modified>
1187 <cp:contentStatus>Draft</cp:contentStatus>
1188</cp:coreProperties>"#;
1189
1190 let props = parse_core_properties(xml).unwrap();
1191
1192 assert_eq!(props.title, Some("Test Document Title".to_string()));
1193 assert_eq!(props.creator, Some("John Doe".to_string()));
1194 assert_eq!(props.subject, Some("Testing".to_string()));
1195 assert_eq!(
1196 props.description,
1197 Some("A test document for unit testing.".to_string())
1198 );
1199 assert_eq!(props.keywords, Some("test, unit, document".to_string()));
1200 assert_eq!(props.category, Some("Testing".to_string()));
1201 assert_eq!(props.last_modified_by, Some("Jane Doe".to_string()));
1202 assert_eq!(props.revision, Some("5".to_string()));
1203 assert_eq!(props.created, Some("2024-01-15T10:30:00Z".to_string()));
1204 assert_eq!(props.modified, Some("2024-01-16T14:45:00Z".to_string()));
1205 assert_eq!(props.content_status, Some("Draft".to_string()));
1206 }
1207
1208 #[test]
1209 fn test_serialize_core_properties() {
1210 let props = CoreProperties {
1211 title: Some("My Doc".to_string()),
1212 creator: Some("Alice".to_string()),
1213 created: Some("2024-01-01T00:00:00Z".to_string()),
1214 modified: Some("2024-01-02T00:00:00Z".to_string()),
1215 ..Default::default()
1216 };
1217
1218 let bytes = serialize_core_properties(&props).unwrap();
1219 let xml = String::from_utf8(bytes).unwrap();
1220
1221 assert!(xml.contains("<dc:title>My Doc</dc:title>"));
1222 assert!(xml.contains("<dc:creator>Alice</dc:creator>"));
1223 assert!(xml.contains(
1224 r#"<dcterms:created xsi:type="dcterms:W3CDTF">2024-01-01T00:00:00Z</dcterms:created>"#
1225 ));
1226 assert!(xml.contains("cp:coreProperties"));
1227
1228 let parsed = parse_core_properties(xml.as_bytes()).unwrap();
1230 assert_eq!(parsed.title, Some("My Doc".to_string()));
1231 assert_eq!(parsed.creator, Some("Alice".to_string()));
1232 assert_eq!(parsed.created, Some("2024-01-01T00:00:00Z".to_string()));
1233 }
1234
1235 #[test]
1236 fn test_serialize_app_properties() {
1237 let props = AppProperties {
1238 application: Some("ooxml-wml".to_string()),
1239 pages: Some(3),
1240 words: Some(500),
1241 ..Default::default()
1242 };
1243
1244 let bytes = serialize_app_properties(&props).unwrap();
1245 let xml = String::from_utf8(bytes).unwrap();
1246
1247 assert!(xml.contains("<Application>ooxml-wml</Application>"));
1248 assert!(xml.contains("<Pages>3</Pages>"));
1249 assert!(xml.contains("<Words>500</Words>"));
1250
1251 let parsed = parse_app_properties(xml.as_bytes()).unwrap();
1253 assert_eq!(parsed.application, Some("ooxml-wml".to_string()));
1254 assert_eq!(parsed.pages, Some(3));
1255 assert_eq!(parsed.words, Some(500));
1256 }
1257
1258 #[test]
1259 fn test_serialize_core_properties_xml_escape() {
1260 let props = CoreProperties {
1261 title: Some("A & B < C".to_string()),
1262 ..Default::default()
1263 };
1264 let bytes = serialize_core_properties(&props).unwrap();
1265 let xml = String::from_utf8(bytes).unwrap();
1266 assert!(xml.contains("<dc:title>A & B < C</dc:title>"));
1267 }
1268
1269 #[test]
1270 fn test_parse_app_properties() {
1271 let xml = br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
1272<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"
1273 xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">
1274 <Application>Microsoft Office Word</Application>
1275 <AppVersion>16.0000</AppVersion>
1276 <Company>Test Corp</Company>
1277 <Manager>Project Lead</Manager>
1278 <TotalTime>120</TotalTime>
1279 <Pages>5</Pages>
1280 <Words>1234</Words>
1281 <Characters>6789</Characters>
1282 <CharactersWithSpaces>8000</CharactersWithSpaces>
1283 <Paragraphs>45</Paragraphs>
1284 <Lines>100</Lines>
1285 <Template>Normal.dotm</Template>
1286 <DocSecurity>0</DocSecurity>
1287</Properties>"#;
1288
1289 let props = parse_app_properties(xml).unwrap();
1290
1291 assert_eq!(props.application, Some("Microsoft Office Word".to_string()));
1292 assert_eq!(props.app_version, Some("16.0000".to_string()));
1293 assert_eq!(props.company, Some("Test Corp".to_string()));
1294 assert_eq!(props.manager, Some("Project Lead".to_string()));
1295 assert_eq!(props.total_time, Some(120));
1296 assert_eq!(props.pages, Some(5));
1297 assert_eq!(props.words, Some(1234));
1298 assert_eq!(props.characters, Some(6789));
1299 assert_eq!(props.characters_with_spaces, Some(8000));
1300 assert_eq!(props.paragraphs, Some(45));
1301 assert_eq!(props.lines, Some(100));
1302 assert_eq!(props.template, Some("Normal.dotm".to_string()));
1303 assert_eq!(props.doc_security, Some(0));
1304 }
1305}