docx_reader/reader/
mod.rs1mod a_graphic;
2mod a_graphic_data;
3mod attributes;
4mod custom_properties;
5mod delete;
6mod div;
7mod doc_defaults;
8mod doc_grid;
9mod document;
10mod document_rels;
11mod drawing;
12mod errors;
13mod font_group;
14mod font_scheme;
15mod footer;
16mod from_xml;
17mod header;
18mod hyperlink;
19mod ignore;
20mod insert;
21mod level;
22mod level_override;
23mod mc_fallback;
24mod numbering_property;
25mod numberings;
26mod paragraph;
27mod paragraph_property;
28mod paragraph_property_change;
29mod pic;
30mod read_zip;
31mod rels;
32mod run;
33mod run_property;
34mod section_property;
35mod settings;
36mod shading;
37mod shape;
38mod structured_data_tag;
39mod style;
40mod styles;
41mod tab;
42mod table;
43mod table_borders;
44mod table_cell;
45mod table_cell_borders;
46mod table_cell_margins;
47mod table_cell_property;
48mod table_property;
49mod table_row;
50mod tabs;
51mod text_box_content;
52mod theme;
53mod web_settings;
54mod wp_anchor;
55mod wps_shape;
56mod wps_text_box;
57mod xml_element;
58
59use std::{collections::HashMap, io::Cursor};
60use zip::ZipArchive;
61
62use crate::documents::*;
63
64pub use attributes::*;
65pub use document_rels::*;
66pub use errors::ReaderError;
67pub use from_xml::*;
68pub use mc_fallback::*;
69pub use read_zip::*;
70pub use xml_element::*;
71
72const DOC_RELATIONSHIP_TYPE: &str =
74 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
75const CUSTOM_PROPERTIES_TYPE: &str =
76 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties";
77const STYLE_RELATIONSHIP_TYPE: &str =
78 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
79const NUMBERING_RELATIONSHIP_TYPE: &str =
80 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering";
81const SETTINGS_TYPE: &str =
82 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings";
83const WEB_SETTINGS_TYPE: &str =
84 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings";
85const HEADER_TYPE: &str =
86 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header";
87const FOOTER_TYPE: &str =
88 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer";
89const THEME_TYPE: &str =
90 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme";
91const IMAGE_TYPE: &str =
92 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image";
93const HYPERLINK_TYPE: &str =
94 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";
95
96fn read_headers(
97 rels: &ReadDocumentRels,
98 archive: &mut ZipArchive<Cursor<&[u8]>>,
99) -> HashMap<RId, Header> {
100 let header_paths = rels.find_target_path(HEADER_TYPE);
101 let headers: HashMap<RId, Header> = header_paths
102 .unwrap_or_default()
103 .into_iter()
104 .filter_map(|(rid, path, ..)| {
105 let data = read_zip(archive, path.to_str().expect("should have header path."));
106 if let Ok(d) = data {
107 if let Ok(h) = Header::from_xml(&d[..]) {
108 return Some((rid, h));
109 }
110 }
111 None
112 })
113 .collect();
114 headers
115}
116
117fn read_footers(
118 rels: &ReadDocumentRels,
119 archive: &mut ZipArchive<Cursor<&[u8]>>,
120) -> HashMap<RId, Footer> {
121 let footer_paths = rels.find_target_path(FOOTER_TYPE);
122 let footers: HashMap<RId, Footer> = footer_paths
123 .unwrap_or_default()
124 .into_iter()
125 .filter_map(|(rid, path, ..)| {
126 let data = read_zip(archive, path.to_str().expect("should have footer path."));
127 if let Ok(d) = data {
128 if let Ok(h) = Footer::from_xml(&d[..]) {
129 return Some((rid, h));
130 }
131 }
132 None
133 })
134 .collect();
135 footers
136}
137
138fn read_themes(rels: &ReadDocumentRels, archive: &mut ZipArchive<Cursor<&[u8]>>) -> Vec<Theme> {
139 let theme_paths = rels.find_target_path(THEME_TYPE);
140 theme_paths
141 .unwrap_or_default()
142 .into_iter()
143 .filter_map(|(_rid, path, ..)| {
144 let data = read_zip(archive, path.to_str().expect("should have footer path."));
145 if let Ok(d) = data {
146 if let Ok(h) = Theme::from_xml(&d[..]) {
147 return Some(h);
148 }
149 }
150 None
151 })
152 .collect()
153}
154
155pub fn read_docx(buf: &[u8]) -> Result<Docx, ReaderError> {
156 let mut docx = Docx::new();
157 let cur = Cursor::new(buf);
158 let mut archive = zip::ZipArchive::new(cur)?;
159 let _content_types = {
162 let data = read_zip(&mut archive, "[Content_Types].xml")?;
163 ContentTypes::from_xml(&data[..])?
164 };
165
166 let rels = {
169 let data = read_zip(&mut archive, "_rels/.rels")?;
170 Rels::from_xml(&data[..])?
171 };
172
173 let main_rel = rels
176 .find_target(DOC_RELATIONSHIP_TYPE)
177 .ok_or(ReaderError::DocumentNotFoundError);
178
179 let document_path = if let Ok(rel) = main_rel {
180 rel.2.clone()
181 } else {
182 "word/document.xml".to_owned()
183 };
184
185 if let Some(custom_props) = rels.find_target(CUSTOM_PROPERTIES_TYPE) {
186 let data = read_zip(&mut archive, &custom_props.2);
187 if let Ok(data) = data {
188 if let Ok(custom) = CustomProps::from_xml(&data[..]) {
189 docx.doc_props.custom = custom;
190 }
191 }
192 }
193
194 let rels = read_document_rels(&mut archive, &document_path)?;
195
196 let headers = read_headers(&rels, &mut archive);
197 let footers = read_footers(&rels, &mut archive);
198
199 docx.themes = read_themes(&rels, &mut archive);
200
201 if let Some(h) = docx.document.section_property.header_reference.clone() {
203 if let Some(header) = headers.get(&h.id) {
204 docx.document = docx.document.header(header.clone(), &h.id);
205 let count = docx.document_rels.header_count + 1;
206 docx.document_rels.header_count = count;
207 docx.content_type = docx.content_type.add_header();
208 }
209 }
210 if let Some(ref h) = docx
211 .document
212 .section_property
213 .first_header_reference
214 .clone()
215 {
216 if let Some(header) = headers.get(&h.id) {
217 docx.document = docx.document.first_header(header.clone(), &h.id);
218 let count = docx.document_rels.header_count + 1;
219 docx.document_rels.header_count = count;
220 docx.content_type = docx.content_type.add_header();
221 }
222 }
223 if let Some(ref h) = docx.document.section_property.even_header_reference.clone() {
224 if let Some(header) = headers.get(&h.id) {
225 docx.document = docx.document.even_header(header.clone(), &h.id);
226 let count = docx.document_rels.header_count + 1;
227 docx.document_rels.header_count = count;
228 docx.content_type = docx.content_type.add_header();
229 }
230 }
231
232 if let Some(f) = docx.document.section_property.footer_reference.clone() {
234 if let Some(footer) = footers.get(&f.id) {
235 docx.document = docx.document.footer(footer.clone(), &f.id);
236 let count = docx.document_rels.footer_count + 1;
237 docx.document_rels.footer_count = count;
238 docx.content_type = docx.content_type.add_footer();
239 }
240 }
241
242 if let Some(ref f) = docx
243 .document
244 .section_property
245 .first_footer_reference
246 .clone()
247 {
248 if let Some(footer) = footers.get(&f.id) {
249 docx.document = docx.document.first_footer(footer.clone(), &f.id);
250 let count = docx.document_rels.footer_count + 1;
251 docx.document_rels.footer_count = count;
252 docx.content_type = docx.content_type.add_footer();
253 }
254 }
255 if let Some(ref f) = docx.document.section_property.even_footer_reference.clone() {
256 if let Some(footer) = footers.get(&f.id) {
257 docx.document = docx.document.even_footer(footer.clone(), &f.id);
258 let count = docx.document_rels.footer_count + 1;
259 docx.document_rels.footer_count = count;
260 docx.content_type = docx.content_type.add_footer();
261 }
262 }
263
264 let style_path = rels.find_target_path(STYLE_RELATIONSHIP_TYPE);
267 if let Some(paths) = style_path {
268 if let Some((_, style_path, ..)) = paths.get(0) {
269 let data = read_zip(
270 &mut archive,
271 style_path.to_str().expect("should have styles"),
272 )?;
273 let styles = Styles::from_xml(&data[..])?;
274 docx = docx.styles(styles);
275 }
276 }
277
278 let num_path = rels.find_target_path(NUMBERING_RELATIONSHIP_TYPE);
280 if let Some(paths) = num_path {
281 if let Some((_, num_path, ..)) = paths.get(0) {
282 let data = read_zip(
283 &mut archive,
284 num_path.to_str().expect("should have numberings"),
285 )?;
286 let nums = Numberings::from_xml(&data[..])?;
287 docx = docx.numberings(nums);
288 }
289 }
290
291 let settings_path = rels.find_target_path(SETTINGS_TYPE);
293 if let Some(paths) = settings_path {
294 if let Some((_, settings_path, ..)) = paths.get(0) {
295 let data = read_zip(
296 &mut archive,
297 settings_path.to_str().expect("should have settings"),
298 )?;
299 let settings = Settings::from_xml(&data[..])?;
300 docx = docx.settings(settings);
301 }
302 }
303
304 let web_settings_path = rels.find_target_path(WEB_SETTINGS_TYPE);
306 if let Some(paths) = web_settings_path {
307 if let Some((_, web_settings_path, ..)) = paths.get(0) {
308 let data = read_zip(
309 &mut archive,
310 web_settings_path
311 .to_str()
312 .expect("should have web settings"),
313 )?;
314 let web_settings = WebSettings::from_xml(&data[..])?;
315 docx = docx.web_settings(web_settings);
316 }
317 }
318 let media = rels.find_target_path(IMAGE_TYPE);
320 if let Some(paths) = media {
321 for (id, media, ..) in paths {
322 if let Ok(data) = read_zip(&mut archive, media.to_str().expect("should have media")) {
323 docx = docx.add_image(id, media.to_str().unwrap().to_string(), data);
324 }
325 }
326 }
327
328 let links = rels.find_target_path(HYPERLINK_TYPE);
330 if let Some(paths) = links {
331 for (id, target, mode) in paths {
332 if let Some(mode) = mode {
333 docx =
334 docx.add_hyperlink(id, target.to_str().expect("should convert to str"), mode);
335 }
336 }
337 }
338
339 Ok(docx)
340}