docx_rust/
docx.rs

1use hard_xml::{XmlRead, XmlWrite, XmlWriter};
2use std::collections::HashMap;
3use std::fs::File;
4use std::io::{Read, Seek, Write};
5use std::path::Path;
6use zip::write::SimpleFileOptions;
7use zip::{result::ZipError, CompressionMethod, ZipArchive, ZipWriter};
8
9use crate::document::{Comments, EndNotes, FootNotes, Footer, Header, Numbering, Theme};
10use crate::media::MediaType;
11use crate::schema::{
12    SCHEMA_COMMENTS, SCHEMA_ENDNOTES, SCHEMA_FOOTNOTES, SCHEMA_HEADER, SCHEMA_NUMBERING,
13    SCHEMA_SETTINGS, SCHEMA_THEME, SCHEMA_WEB_SETTINGS,
14};
15use crate::settings::Settings;
16use crate::web_settings::WebSettings;
17use crate::{
18    app::App,
19    content_type::ContentTypes,
20    core::Core,
21    document::Document,
22    error::DocxResult,
23    font_table::FontTable,
24    rels::Relationships,
25    schema::{
26        SCHEMA_CORE, SCHEMA_FONT_TABLE, SCHEMA_OFFICE_DOCUMENT, SCHEMA_REL_EXTENDED, SCHEMA_STYLES,
27    },
28    styles::Styles,
29};
30
31/// A WordprocessingML package
32#[derive(Debug, Default, Clone)]
33pub struct Docx<'a> {
34    /// Specifies package-level properties part
35    pub app: Option<App<'a>>,
36    /// Specifies core properties part
37    pub core: Option<Core<'a>>,
38    /// Specifies the content type of relationship parts and the main document part.
39    pub content_types: ContentTypes<'a>,
40    /// Specifies the main document part.
41    pub document: Document<'a>,
42    /// Specifies the font table part
43    pub font_table: Option<FontTable<'a>>,
44    /// Specifies the style definitions part
45    pub styles: Styles<'a>,
46    /// Specifies the package-level relationship to the main document part
47    pub rels: Relationships<'a>,
48    /// Specifies the part-level relationship to the main document part
49    pub document_rels: Option<Relationships<'a>>,
50    pub settings_rels: Option<Relationships<'a>>,
51    pub headers: HashMap<String, Header<'a>>,
52    pub footers: HashMap<String, Footer<'a>>,
53    pub themes: HashMap<String, Theme<'a>>,
54    pub media: HashMap<String, (MediaType, &'a Vec<u8>)>,
55    pub footnotes: Option<FootNotes<'a>>,
56    pub endnotes: Option<EndNotes<'a>>,
57    pub settings: Option<Settings<'a>>,
58    pub web_settings: Option<WebSettings>,
59    pub comments: Option<Comments<'a>>,
60    pub numbering: Option<Numbering<'a>>,
61}
62
63impl<'a> Docx<'a> {
64    pub fn write<W: Write + Seek>(&'a mut self, writer: W) -> DocxResult<W> {
65        let mut writer = XmlWriter::new(ZipWriter::new(writer));
66
67        let opt = SimpleFileOptions::default()
68            .compression_method(CompressionMethod::Deflated)
69            .unix_permissions(0o755);
70
71        // ==== Add Relationships ====
72
73        if self.app.is_some() {
74            self.rels.add_rel(SCHEMA_REL_EXTENDED, "docProps/app.xml");
75        }
76
77        if self.core.is_some() {
78            self.rels.add_rel(SCHEMA_CORE, "docProps/core.xml");
79        }
80
81        self.rels
82            .add_rel(SCHEMA_OFFICE_DOCUMENT, "word/document.xml");
83
84        self.document_rels
85            .get_or_insert(Relationships::default())
86            .add_rel(SCHEMA_STYLES, "styles.xml");
87
88        if self.font_table.is_some() {
89            self.document_rels
90                .get_or_insert(Relationships::default())
91                .add_rel(SCHEMA_FONT_TABLE, "fontTable.xml");
92        }
93
94        if self.footnotes.is_some() {
95            self.document_rels
96                .get_or_insert(Relationships::default())
97                .add_rel(SCHEMA_FOOTNOTES, "footnotes.xml");
98        }
99
100        if self.endnotes.is_some() {
101            self.document_rels
102                .get_or_insert(Relationships::default())
103                .add_rel(SCHEMA_ENDNOTES, "endnotes.xml");
104        }
105
106        if self.settings.is_some() {
107            self.document_rels
108                .get_or_insert(Relationships::default())
109                .add_rel(SCHEMA_SETTINGS, "settings.xml");
110        }
111
112        if self.web_settings.is_some() {
113            self.document_rels
114                .get_or_insert(Relationships::default())
115                .add_rel(SCHEMA_WEB_SETTINGS, "webSettings.xml");
116        }
117
118        if self.comments.is_some() {
119            self.document_rels
120                .get_or_insert(Relationships::default())
121                .add_rel(SCHEMA_COMMENTS, "comments.xml");
122        }
123
124        if self.numbering.is_some() {
125            self.document_rels
126                .get_or_insert(Relationships::default())
127                .add_rel(SCHEMA_NUMBERING, "numbering.xml");
128        }
129
130        for hd in &self.headers {
131            self.document_rels
132                .get_or_insert(Relationships::default())
133                .add_rel(SCHEMA_HEADER, hd.0);
134        }
135
136        for ft in &self.footers {
137            self.document_rels
138                .get_or_insert(Relationships::default())
139                .add_rel(SCHEMA_HEADER, ft.0);
140        }
141
142        for theme in &self.themes {
143            self.document_rels
144                .get_or_insert(Relationships::default())
145                .add_rel(SCHEMA_THEME, theme.0);
146        }
147
148        for media in &self.media {
149            let rel = crate::media::get_media_type_relation_type(&media.1 .0);
150            self.document_rels
151                .get_or_insert(Relationships::default())
152                .add_rel(rel, media.0);
153        }
154
155        // ==== Write Zip Item ====
156
157        macro_rules! write_xml {
158            (Some($xml:expr) => $name:tt) => {
159                if let Some(ref xml) = $xml {
160                    write_xml!(xml => $name);
161                }
162            };
163            (Some($xml:expr) => $name:tt $($rest:tt)*) => {
164                write_xml!(Some($xml) => $name);
165                write_xml!($($rest)*);
166            };
167            ($xml:expr => $name:tt) => {
168                writer.inner.start_file($name, opt)?;
169                $xml.to_writer(&mut writer)?;
170            };
171            ($xml:expr => $name:tt $($rest:tt)*) => {
172                write_xml!($xml => $name);
173                write_xml!($($rest)*);
174            };
175        }
176
177        write_xml!(
178            self.content_types        => "[Content_Types].xml"
179            Some(self.app)            => "docProps/app.xml"
180            Some(self.core)           => "docProps/core.xml"
181            self.rels                 => "_rels/.rels"
182            self.document             => "word/document.xml"
183            self.styles               => "word/styles.xml"
184            Some(self.font_table)     => "word/fontTable.xml"
185            Some(self.footnotes)      => "word/footnotes.xml"
186            Some(self.endnotes)       => "word/endnotes.xml"
187            Some(self.settings)       => "word/settings.xml"
188            Some(self.web_settings)   => "word/webSettings.xml"
189            Some(self.comments)       => "word/comments.xml"
190            Some(self.numbering)      => "word/numbering.xml"
191            Some(self.document_rels)  => "word/_rels/document.xml.rels"
192            Some(self.settings_rels)  => "word/_rels/settings.xml.rels"
193        );
194
195        for hd in self.headers.iter() {
196            let file_path = format!("word/{}", hd.0);
197            let content = hd.1;
198            write_xml!(
199                content => file_path
200            );
201        }
202
203        for hd in self.footers.iter() {
204            let file_path = format!("word/{}", hd.0);
205            let content = hd.1;
206            write_xml!(
207                content => file_path
208            );
209        }
210
211        for theme in self.themes.iter() {
212            let file_path = format!("word/{}", theme.0);
213            let content = theme.1;
214            write_xml!(
215                content => file_path
216            );
217        }
218
219        for media in self.media.iter() {
220            let file_path = format!("word/{}", media.0);
221            writer.inner.start_file(file_path, opt)?;
222            writer.inner.write_all(media.1 .1)?;
223        }
224
225        Ok(writer.inner.finish()?)
226    }
227
228    pub fn write_file<P: AsRef<Path>>(&'a mut self, path: P) -> DocxResult<File> {
229        if let Some(p) = path.as_ref().parent() {
230            std::fs::create_dir_all(p)?;
231        }
232        let file = File::create(path)?;
233        self.write(file)
234    }
235}
236
237/// An extracted docx file
238pub struct DocxFile {
239    app: Option<String>,
240    content_types: String,
241    core: Option<String>,
242    document: String,
243    document_rels: Option<String>,
244    settings_rels: Option<String>,
245    font_table: Option<String>,
246    rels: String,
247    styles: Option<String>,
248    settings: Option<String>,
249    web_settings: Option<String>,
250    headers: Vec<(String, String)>,
251    footers: Vec<(String, String)>,
252    themes: Vec<(String, String)>,
253    medias: Vec<(String, Vec<u8>)>,
254    footnotes: Option<String>,
255    endnotes: Option<String>,
256    comments: Option<String>,
257    numbering: Option<String>,
258}
259
260impl DocxFile {
261    /// Extracts from reader
262    pub fn from_reader<T: Read + Seek>(reader: T) -> DocxResult<Self> {
263        let mut zip = ZipArchive::new(reader)?;
264
265        macro_rules! read {
266            ($xml:tt, $name:expr) => {{
267                let mut file = zip.by_name($name)?;
268                let mut buffer = String::new();
269                file.read_to_string(&mut buffer)?;
270                buffer
271            }};
272        }
273
274        macro_rules! option_read {
275            ($xml:tt, $name:expr) => {
276                match zip.by_name($name) {
277                    Err(ZipError::FileNotFound) => None,
278                    Err(e) => return Err(e.into()),
279                    Ok(mut file) => {
280                        let mut buffer = String::new();
281                        file.read_to_string(&mut buffer)?;
282                        Some(buffer)
283                    }
284                }
285            };
286        }
287
288        macro_rules! option_read_multiple {
289            ($xml:tt, $name:expr) => {{
290                let names: Vec<_> = zip.file_names().map(|x| x.to_string()).collect();
291                let name_and_value: Vec<_> = names
292                    .iter()
293                    .filter(|n| n.contains($name))
294                    .filter_map(|f| {
295                        zip.by_name(f).ok().and_then(|mut file| {
296                            let mut buffer = String::new();
297                            file.read_to_string(&mut buffer).ok()?;
298                            Some((f.to_string(), buffer))
299                        })
300                    })
301                    .collect();
302                name_and_value
303            }};
304        }
305
306        macro_rules! option_read_multiple_files {
307            ($xml:tt, $name:expr) => {{
308                let names: Vec<_> = zip.file_names().map(|x| x.to_string()).collect();
309                let name_and_value: Vec<_> = names
310                    .iter()
311                    .filter(|n| n.contains($name))
312                    .filter_map(|f| {
313                        zip.by_name(f).ok().and_then(|mut file| {
314                            let mut buffer = Vec::new();
315                            file.read_to_end(&mut buffer).ok()?;
316                            Some((f.to_string(), buffer))
317                        })
318                    })
319                    .collect();
320                name_and_value
321            }};
322        }
323
324        let app = option_read!(App, "docProps/app.xml");
325        let content_types = read!(ContentTypes, "[Content_Types].xml");
326        let core = option_read!(Core, "docProps/core.xml");
327        let document_rels = option_read!(Relationships, "word/_rels/document.xml.rels");
328        let settings_rels = option_read!(Relationships, "word/_rels/settings.xml.rels");
329        let document = read!(Document, "word/document.xml");
330        let font_table = option_read!(FontTable, "word/fontTable.xml");
331        let rels = read!(Relationships, "_rels/.rels");
332        let styles = option_read!(Styles, "word/styles.xml");
333        let settings = option_read!(Settings, "word/settings.xml");
334        let web_settings = option_read!(WebSettings, "word/webSettings.xml");
335        let footnotes = option_read!(Footnotes, "word/footnotes.xml");
336        let endnotes = option_read!(Endnotes, "word/endnotes.xml");
337        let comments = option_read!(Comments, "word/comments.xml");
338        let numbering = option_read!(Numbering, "word/numbering.xml");
339
340        let headers = option_read_multiple!(Headers, "word/header");
341        let footers = option_read_multiple!(Footers, "word/footer");
342        let themes = option_read_multiple!(Themes, "word/theme/theme");
343        let medias = option_read_multiple_files!(Medias, "word/media");
344
345        Ok(DocxFile {
346            app,
347            content_types,
348            core,
349            document_rels,
350            settings_rels,
351            document,
352            font_table,
353            rels,
354            styles,
355            settings,
356            web_settings,
357            headers,
358            footers,
359            themes,
360            medias,
361            footnotes,
362            endnotes,
363            comments,
364            numbering,
365        })
366    }
367
368    /// Extracts from file
369    #[inline]
370    pub fn from_file<P: AsRef<Path>>(path: P) -> DocxResult<Self> {
371        Self::from_reader(File::open(path)?)
372    }
373
374    /// Parses content into `Docx` struct
375    pub fn parse(&self) -> DocxResult<Docx<'_>> {
376        let app = if let Some(content) = &self.app {
377            Some(App::from_str(content)?)
378        } else {
379            None
380        };
381
382        let document = Document::from_str(&self.document)?;
383
384        let mut headers = HashMap::new();
385        for f in self.headers.iter() {
386            let hd = Header::from_str(&f.1)?;
387            let name = f.0.replace("word/", "");
388            headers.insert(name, hd);
389        }
390
391        let mut footers = HashMap::new();
392        for f in self.footers.iter() {
393            let ft = Footer::from_str(&f.1)?;
394            let name = f.0.replace("word/", "");
395            footers.insert(name, ft);
396        }
397
398        let mut media = HashMap::new();
399        for m in self.medias.iter() {
400            let mt = crate::media::get_media_type(&m.0);
401            if let Some(mt) = mt {
402                let name = m.0.replace("word/", "");
403                let m = (mt, &m.1);
404                media.insert(name, m);
405            }
406        }
407
408        let mut themes = HashMap::new();
409        // turn off for now
410        for t in self.themes.iter() {
411            let th = Theme::from_str(&t.1)?;
412            let name = t.0.replace("word/", "");
413            themes.insert(name, th);
414        }
415
416        let content_types = ContentTypes::from_str(&self.content_types)?;
417
418        let core = if let Some(content) = &self.core {
419            Some(Core::from_str(content)?)
420        } else {
421            None
422        };
423
424        let document_rels: Option<Relationships> = if let Some(content) = &self.document_rels {
425            Some(Relationships::from_str(content)?)
426        } else {
427            None
428        };
429        let document_rels = document_rels.map(|rel: Relationships| {
430            let rrr: Vec<_> = rel
431                .relationships
432                .iter()
433                .filter(|r2| {
434                    matches!(
435                        r2.ty.to_string().as_str(),
436                        crate::schema::SCHEMA_HEADER
437                            | crate::schema::SCHEMA_FOOTER
438                            | crate::schema::SCHEMA_THEME
439                            | crate::schema::SCHEMA_FONT_TABLE
440                            | crate::schema::SCHEMA_STYLES
441                            | crate::schema::SCHEMA_FOOTNOTES
442                            | crate::schema::SCHEMA_ENDNOTES
443                            | crate::schema::SCHEMA_SETTINGS
444                            | crate::schema::SCHEMA_WEB_SETTINGS
445                            | crate::schema::SCHEMA_COMMENTS
446                            | crate::schema::SCHEMA_IMAGE
447                            | crate::schema::SCHEMA_HYPERLINK
448                            | crate::schema::SCHEMA_NUMBERING
449                    )
450                })
451                .map(|d| d.to_owned())
452                .collect();
453            Relationships { relationships: rrr }
454        });
455
456        let settings_rels = self
457            .settings_rels
458            .as_deref()
459            .map(Relationships::from_str)
460            .transpose()?;
461
462        let font_table = if let Some(content) = &self.font_table {
463            Some(FontTable::from_str(content)?)
464        } else {
465            None
466        };
467
468        let footnotes = if let Some(content) = &self.footnotes {
469            Some(FootNotes::from_str(content)?)
470        } else {
471            None
472        };
473
474        let endnotes = if let Some(content) = &self.endnotes {
475            Some(EndNotes::from_str(content)?)
476        } else {
477            None
478        };
479
480        let settings = if let Some(content) = &self.settings {
481            Some(Settings::from_str(content)?)
482        } else {
483            None
484        };
485
486        let web_settings = if let Some(content) = &self.web_settings {
487            Some(WebSettings::from_str(
488                &content.replace("ns0:", "w:").to_string(),
489            )?)
490        } else {
491            None
492        };
493
494        let comments = if let Some(content) = &self.comments {
495            Some(Comments::from_str(content)?)
496        } else {
497            None
498        };
499
500        let numbering = if let Some(content) = &self.numbering {
501            Some(Numbering::from_str(content)?)
502        } else {
503            None
504        };
505
506        let rels = Relationships::from_str(&self.rels)?;
507        let rels = {
508            let rrr: Vec<_> = rels
509                .relationships
510                .iter()
511                .filter(|r2| {
512                    matches!(
513                        r2.ty.to_string().as_str(),
514                        crate::schema::SCHEMA_CORE
515                            | crate::schema::SCHEMA_REL_EXTENDED
516                            | crate::schema::SCHEMA_OFFICE_DOCUMENT
517                    )
518                })
519                .map(|d| d.to_owned())
520                .collect();
521            Relationships { relationships: rrr }
522        };
523
524        let styles = self
525            .styles
526            .as_ref()
527            .map(|content| Styles::from_str(content))
528            .transpose()?
529            .unwrap_or_default();
530
531        Ok(Docx {
532            app,
533            content_types,
534            core,
535            document,
536            document_rels,
537            settings_rels,
538            font_table,
539            rels,
540            styles,
541            headers,
542            footers,
543            themes,
544            media,
545            footnotes,
546            endnotes,
547            settings,
548            web_settings,
549            comments,
550            numbering,
551        })
552    }
553}