1mod a_graphic;
2mod a_graphic_data;
3mod attributes;
4mod bookmark_end;
5mod bookmark_start;
6mod cell_margins;
7mod comment;
8mod comment_extended;
9mod comments;
10mod comments_extended;
11mod custom_properties;
12mod delete;
13mod div;
14mod doc_defaults;
15mod doc_grid;
16mod document;
17mod document_rels;
18mod drawing;
19mod errors;
20mod font_group;
21mod font_scheme;
22mod footer;
23mod frame_property;
24mod from_xml;
25mod header;
26mod header_or_footer_rels;
27mod hyperlink;
28mod ignore;
29mod insert;
30mod level;
31mod level_override;
32mod mc_fallback;
33mod numbering_property;
34mod numberings;
35mod page_num_type;
36mod paragraph;
37mod paragraph_property;
38mod paragraph_property_change;
39mod pic;
40mod positional_tab;
41mod read_zip;
42mod rels;
43mod run;
44mod run_property;
45mod section_property;
46mod settings;
47mod shading;
48mod shape;
49mod structured_data_tag;
50mod style;
51mod styles;
52mod tab;
53mod table;
54mod table_borders;
55mod table_cell;
56mod table_cell_borders;
57mod table_cell_margins;
58mod table_cell_property;
59mod table_position_property;
60mod table_property;
61mod table_row;
62mod tabs;
63mod text_box_content;
64mod theme;
65mod web_settings;
66mod wp_anchor;
67mod wps_shape;
68mod wps_text_box;
69mod xml_element;
70
71use std::{collections::HashMap, io::Cursor, path::PathBuf};
72
73use crate::documents::*;
74
75pub use attributes::*;
76pub use document_rels::*;
77pub use errors::ReaderError;
78pub use from_xml::*;
79pub use read_zip::*;
80pub use xml_element::*;
81use zip::ZipArchive;
82
83use self::header_or_footer_rels::{read_header_or_footer_rels, ReadHeaderOrFooterRels};
84
85const DOC_RELATIONSHIP_TYPE: &str =
87 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
88const CUSTOM_PROPERTIES_TYPE: &str =
89 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties";
90const STYLE_RELATIONSHIP_TYPE: &str =
91 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
92const NUMBERING_RELATIONSHIP_TYPE: &str =
93 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering";
94const SETTINGS_TYPE: &str =
95 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings";
96const COMMENTS_TYPE: &str =
97 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments";
98const WEB_SETTINGS_TYPE: &str =
99 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings";
100const HEADER_TYPE: &str =
101 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header";
102const FOOTER_TYPE: &str =
103 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer";
104const THEME_TYPE: &str =
105 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme";
106const IMAGE_TYPE: &str =
107 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image";
108const HYPERLINK_TYPE: &str =
109 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";
110const COMMENTS_EXTENDED_TYPE: &str =
112 "http://schemas.microsoft.com/office/2011/relationships/commentsExtended";
113
114fn read_headers(
115 rels: &ReadDocumentRels,
116 archive: &mut ZipArchive<Cursor<&[u8]>>,
117) -> HashMap<RId, (Header, ReadHeaderOrFooterRels)> {
118 let header_paths = rels.find_target_path(HEADER_TYPE);
119 let headers: HashMap<RId, (Header, ReadHeaderOrFooterRels)> = header_paths
120 .unwrap_or_default()
121 .into_iter()
122 .filter_map(|(rid, path, ..)| {
123 let data = read_zip(archive, path.to_str().expect("should have header path."));
124 if let Ok(d) = data {
125 if let Ok(h) = Header::from_xml(&d[..]) {
126 let rels = read_header_or_footer_rels(archive, path).unwrap_or_default();
127 return Some((rid, (h, rels)));
128 }
129 }
130 None
131 })
132 .collect();
133 headers
134}
135
136fn read_footers(
137 rels: &ReadDocumentRels,
138 archive: &mut ZipArchive<Cursor<&[u8]>>,
139) -> HashMap<RId, (Footer, ReadHeaderOrFooterRels)> {
140 let footer_paths = rels.find_target_path(FOOTER_TYPE);
141 let footers: HashMap<RId, (Footer, ReadHeaderOrFooterRels)> = footer_paths
142 .unwrap_or_default()
143 .into_iter()
144 .filter_map(|(rid, path, ..)| {
145 let data = read_zip(archive, path.to_str().expect("should have footer path."));
146 if let Ok(d) = data {
147 if let Ok(h) = Footer::from_xml(&d[..]) {
148 let rels = read_header_or_footer_rels(archive, path).unwrap_or_default();
149 return Some((rid, (h, rels)));
150 }
151 }
152 None
153 })
154 .collect();
155 footers
156}
157
158fn read_themes(rels: &ReadDocumentRels, archive: &mut ZipArchive<Cursor<&[u8]>>) -> Vec<Theme> {
159 let theme_paths = rels.find_target_path(THEME_TYPE);
160 theme_paths
161 .unwrap_or_default()
162 .into_iter()
163 .filter_map(|(_rid, path, ..)| {
164 let data = read_zip(archive, path.to_str().expect("should have footer path."));
165 if let Ok(d) = data {
166 if let Ok(h) = Theme::from_xml(&d[..]) {
167 return Some(h);
168 }
169 }
170 None
171 })
172 .collect()
173}
174
175pub fn read_docx(buf: &[u8]) -> Result<Docx, ReaderError> {
176 let mut docx = Docx::new();
177 let cur = Cursor::new(buf);
178 let mut archive = zip::ZipArchive::new(cur)?;
179 let _content_types = {
182 let data = read_zip(&mut archive, "[Content_Types].xml")?;
183 ContentTypes::from_xml(&data[..])?
184 };
185
186 let rels = {
189 let data = read_zip(&mut archive, "_rels/.rels")?;
190 Rels::from_xml(&data[..])?
191 };
192
193 let main_rel = rels
196 .find_target(DOC_RELATIONSHIP_TYPE)
197 .ok_or(ReaderError::DocumentNotFoundError);
198
199 let document_path = if let Ok(rel) = main_rel {
200 rel.2.clone()
201 } else {
202 "word/document.xml".to_owned()
203 };
204
205 if let Some(custom_props) = rels.find_target(CUSTOM_PROPERTIES_TYPE) {
206 let data = read_zip(&mut archive, &custom_props.2);
207 if let Ok(data) = data {
208 if let Ok(custom) = CustomProps::from_xml(&data[..]) {
209 docx.doc_props.custom = custom;
210 }
211 }
212 }
213
214 let rels = read_document_rels(&mut archive, &document_path)?;
215
216 let headers = read_headers(&rels, &mut archive);
217 let footers = read_footers(&rels, &mut archive);
218
219 docx.themes = read_themes(&rels, &mut archive);
220
221 let comments_extended_path = rels.find_target_path(COMMENTS_EXTENDED_TYPE);
223 let comments_extended = if let Some(comments_extended_path) = comments_extended_path {
224 if let Some((_, comments_extended_path, ..)) = comments_extended_path.first() {
225 let data = read_zip(
226 &mut archive,
227 comments_extended_path
228 .to_str()
229 .expect("should have comments extended."),
230 );
231 if let Ok(data) = data {
232 CommentsExtended::from_xml(&data[..])?
233 } else {
234 CommentsExtended::default()
235 }
236 } else {
237 CommentsExtended::default()
238 }
239 } else {
240 CommentsExtended::default()
241 };
242
243 let comments_path = rels.find_target_path(COMMENTS_TYPE);
245 let comments = if let Some(paths) = comments_path {
246 if let Some((_, comments_path, ..)) = paths.first() {
247 let data = read_zip(
248 &mut archive,
249 comments_path.to_str().expect("should have comments."),
250 );
251 if let Ok(data) = data {
252 let mut comments = Comments::from_xml(&data[..])?.into_inner();
253 for i in 0..comments.len() {
254 let c = &comments[i];
255 let extended = comments_extended.children.iter().find(|ex| {
256 for child in &c.children {
257 if let CommentChild::Paragraph(p) = child {
258 if ex.paragraph_id == p.id {
259 return true;
260 }
261 }
262 }
263 false
264 });
265 if let Some(CommentExtended {
266 parent_paragraph_id: Some(parent_paragraph_id),
267 ..
268 }) = extended
269 {
270 if let Some(parent_comment) = comments.iter().find(|c| {
271 for child in &c.children {
272 if let CommentChild::Paragraph(p) = child {
273 if &p.id == parent_paragraph_id {
274 return true;
275 }
276 }
277 }
278 false
279 }) {
280 comments[i].parent_comment_id = Some(parent_comment.id);
281 }
282 }
283 }
284 Comments { comments }
285 } else {
286 Comments::default()
287 }
288 } else {
289 Comments::default()
290 }
291 } else {
292 Comments::default()
293 };
294
295 let document = {
296 let data = read_zip(&mut archive, &document_path)?;
297 Document::from_xml(&data[..])?
298 };
299 docx = docx.document(document);
300
301 if let Some(h) = docx.document.section_property.header_reference.clone() {
303 if let Some((header, rels)) = headers.get(&h.id) {
304 docx.document = docx.document.header(header.clone(), &h.id);
305 let count = docx.document_rels.header_count + 1;
306 docx.document_rels.header_count = count;
307 docx.content_type = docx.content_type.add_header();
308 let media = rels.find_target_path(IMAGE_TYPE);
310 docx = add_images(docx, media, &mut archive);
311 }
312 }
313 if let Some(ref h) = docx
314 .document
315 .section_property
316 .first_header_reference
317 .clone()
318 {
319 if let Some((header, rels)) = headers.get(&h.id) {
320 docx.document = docx
321 .document
322 .first_header_without_title_pg(header.clone(), &h.id);
323 let count = docx.document_rels.header_count + 1;
324 docx.document_rels.header_count = count;
325 docx.content_type = docx.content_type.add_header();
326 let media = rels.find_target_path(IMAGE_TYPE);
328 docx = add_images(docx, media, &mut archive);
329 }
330 }
331 if let Some(ref h) = docx.document.section_property.even_header_reference.clone() {
332 if let Some((header, rels)) = headers.get(&h.id) {
333 docx.document = docx.document.even_header(header.clone(), &h.id);
334 let count = docx.document_rels.header_count + 1;
335 docx.document_rels.header_count = count;
336 docx.content_type = docx.content_type.add_header();
337
338 let media = rels.find_target_path(IMAGE_TYPE);
340 docx = add_images(docx, media, &mut archive);
341 }
342 }
343
344 if let Some(f) = docx.document.section_property.footer_reference.clone() {
346 if let Some((footer, rels)) = footers.get(&f.id) {
347 docx.document = docx.document.footer(footer.clone(), &f.id);
348 let count = docx.document_rels.footer_count + 1;
349 docx.document_rels.footer_count = count;
350 docx.content_type = docx.content_type.add_footer();
351
352 let media = rels.find_target_path(IMAGE_TYPE);
354 docx = add_images(docx, media, &mut archive);
355 }
356 }
357
358 if let Some(ref f) = docx
359 .document
360 .section_property
361 .first_footer_reference
362 .clone()
363 {
364 if let Some((footer, rels)) = footers.get(&f.id) {
365 docx.document = docx
366 .document
367 .first_footer_without_title_pg(footer.clone(), &f.id);
368 let count = docx.document_rels.footer_count + 1;
369 docx.document_rels.footer_count = count;
370 docx.content_type = docx.content_type.add_footer();
371
372 let media = rels.find_target_path(IMAGE_TYPE);
374 docx = add_images(docx, media, &mut archive);
375 }
376 }
377 if let Some(ref f) = docx.document.section_property.even_footer_reference.clone() {
378 if let Some((footer, rels)) = footers.get(&f.id) {
379 docx.document = docx.document.even_footer(footer.clone(), &f.id);
380 let count = docx.document_rels.footer_count + 1;
381 docx.document_rels.footer_count = count;
382 docx.content_type = docx.content_type.add_footer();
383
384 let media = rels.find_target_path(IMAGE_TYPE);
386 docx = add_images(docx, media, &mut archive);
387 }
388 }
389
390 if !comments.inner().is_empty() {
392 docx.store_comments(comments.inner());
393 docx = docx.comments(comments);
394 docx = docx.comments_extended(comments_extended);
395 }
396
397 let style_path = rels.find_target_path(STYLE_RELATIONSHIP_TYPE);
400 if let Some(paths) = style_path {
401 if let Some((_, style_path, ..)) = paths.first() {
402 let data = read_zip(
403 &mut archive,
404 style_path.to_str().expect("should have styles"),
405 )?;
406 let styles = Styles::from_xml(&data[..])?;
407 docx = docx.styles(styles);
408 }
409 }
410
411 let num_path = rels.find_target_path(NUMBERING_RELATIONSHIP_TYPE);
413 if let Some(paths) = num_path {
414 if let Some((_, num_path, ..)) = paths.first() {
415 let data = read_zip(
416 &mut archive,
417 num_path.to_str().expect("should have numberings"),
418 )?;
419 let nums = Numberings::from_xml(&data[..])?;
420 docx = docx.numberings(nums);
421 }
422 }
423
424 let settings_path = rels.find_target_path(SETTINGS_TYPE);
426 if let Some(paths) = settings_path {
427 if let Some((_, settings_path, ..)) = paths.first() {
428 let data = read_zip(
429 &mut archive,
430 settings_path.to_str().expect("should have settings"),
431 )?;
432 let settings = Settings::from_xml(&data[..])?;
433 docx = docx.settings(settings);
434 }
435 }
436
437 let web_settings_path = rels.find_target_path(WEB_SETTINGS_TYPE);
439 if let Some(paths) = web_settings_path {
440 if let Some((_, web_settings_path, ..)) = paths.first() {
441 let data = read_zip(
442 &mut archive,
443 web_settings_path
444 .to_str()
445 .expect("should have web settings"),
446 )?;
447 let web_settings = WebSettings::from_xml(&data[..])?;
448 docx = docx.web_settings(web_settings);
449 }
450 }
451 let media = rels.find_target_path(IMAGE_TYPE);
453 docx = add_images(docx, media, &mut archive);
454
455 let links = rels.find_target_path(HYPERLINK_TYPE);
457 if let Some(paths) = links {
458 for (id, target, mode) in paths {
459 if let Some(mode) = mode {
460 docx =
461 docx.add_hyperlink(id, target.to_str().expect("should convert to str"), mode);
462 }
463 }
464 }
465
466 Ok(docx)
467}
468
469fn add_images(
470 mut docx: Docx,
471 media: Option<Vec<(RId, PathBuf, Option<String>)>>,
472 archive: &mut ZipArchive<Cursor<&[u8]>>,
473) -> Docx {
474 if let Some(paths) = media {
476 for (id, media, ..) in paths {
477 if let Ok(data) = read_zip(archive, media.to_str().expect("should have media")) {
478 docx = docx.add_image(id, media.to_str().unwrap().to_string(), data);
479 }
480 }
481 }
482 docx
483}