1use crate::error::{Error, Result};
15use std::collections::HashMap;
16use std::io::{Read, Seek, Write};
17use zip::read::ZipArchive;
18use zip::write::ZipWriter;
19
20pub struct Package<R> {
22 archive: ZipArchive<R>,
23 content_types: ContentTypes,
24}
25
26impl<R: Read + Seek> Package<R> {
27 pub fn open(reader: R) -> Result<Self> {
29 let mut archive = ZipArchive::new(reader)?;
30
31 let content_types = Self::read_content_types(&mut archive)?;
33
34 Ok(Self {
35 archive,
36 content_types,
37 })
38 }
39
40 fn read_content_types(archive: &mut ZipArchive<R>) -> Result<ContentTypes> {
42 let file = archive
43 .by_name("[Content_Types].xml")
44 .map_err(|_| Error::MissingPart("[Content_Types].xml".into()))?;
45
46 ContentTypes::parse(file)
47 }
48
49 pub fn content_types(&self) -> &ContentTypes {
51 &self.content_types
52 }
53
54 pub fn has_part(&self, path: &str) -> bool {
56 self.archive.file_names().any(|name| name == path)
57 }
58
59 pub fn parts(&self) -> impl Iterator<Item = &str> {
61 self.archive.file_names()
62 }
63
64 pub fn read_part(&mut self, path: &str) -> Result<Vec<u8>> {
66 let mut file = self
67 .archive
68 .by_name(path)
69 .map_err(|_| Error::MissingPart(path.into()))?;
70
71 let mut contents = Vec::new();
72 file.read_to_end(&mut contents)?;
73 Ok(contents)
74 }
75
76 pub fn read_part_string(&mut self, path: &str) -> Result<String> {
78 let bytes = self.read_part(path)?;
79 String::from_utf8(bytes)
80 .map_err(|e| Error::Invalid(format!("invalid UTF-8 in {}: {}", path, e)))
81 }
82
83 pub fn content_type(&self, path: &str) -> Option<&str> {
85 self.content_types.get(path)
86 }
87
88 pub fn read_relationships(&mut self) -> Result<crate::relationships::Relationships> {
90 self.read_part_relationships("")
91 }
92
93 pub fn copy_to_writer<W: Write + Seek>(
103 &mut self,
104 writer: &mut PackageWriter<W>,
105 replacements: &HashMap<&str, &[u8]>,
106 ) -> Result<()> {
107 for (ext, ct) in self.content_types.defaults() {
109 writer.add_default_content_type(ext, ct);
110 }
111
112 let parts_info: Vec<(String, String)> = self
114 .parts()
115 .filter(|name| *name != "[Content_Types].xml")
116 .map(|name| {
117 let ct = self
118 .content_types
119 .get(name)
120 .unwrap_or("application/octet-stream")
121 .to_string();
122 (name.to_string(), ct)
123 })
124 .collect();
125
126 for (name, ct) in &parts_info {
128 let data = if let Some(replacement) = replacements.get(name.as_str()) {
129 replacement.to_vec()
130 } else {
131 self.read_part(name)?
132 };
133 writer.add_part(name, ct, &data)?;
134 }
135
136 Ok(())
137 }
138
139 pub fn read_part_relationships(
141 &mut self,
142 part_path: &str,
143 ) -> Result<crate::relationships::Relationships> {
144 let rels_path = crate::relationships::rels_path_for(part_path);
145
146 if !self.has_part(&rels_path) {
147 return Ok(crate::relationships::Relationships::new());
148 }
149
150 let data = self.read_part(&rels_path)?;
151 crate::relationships::Relationships::parse(&data[..])
152 }
153}
154
155pub struct PackageWriter<W: Write + Seek> {
157 writer: ZipWriter<W>,
158 content_types: ContentTypes,
159}
160
161impl<W: Write + Seek> PackageWriter<W> {
162 pub fn new(writer: W) -> Self {
164 Self {
165 writer: ZipWriter::new(writer),
166 content_types: ContentTypes::new(),
167 }
168 }
169
170 pub fn add_part(&mut self, path: &str, content_type: &str, data: &[u8]) -> Result<()> {
172 self.content_types.add_override(path, content_type);
174
175 let options = zip::write::SimpleFileOptions::default()
177 .compression_method(zip::CompressionMethod::Deflated);
178 self.writer.start_file(path, options)?;
179 self.writer.write_all(data)?;
180
181 Ok(())
182 }
183
184 pub fn add_default_content_type(&mut self, extension: &str, content_type: &str) {
186 self.content_types.add_default(extension, content_type);
187 }
188
189 pub fn finish(mut self) -> Result<W> {
191 let content_types_xml = self.content_types.serialize();
193 let options = zip::write::SimpleFileOptions::default()
194 .compression_method(zip::CompressionMethod::Deflated);
195 self.writer.start_file("[Content_Types].xml", options)?;
196 self.writer.write_all(content_types_xml.as_bytes())?;
197
198 Ok(self.writer.finish()?)
199 }
200}
201
202#[derive(Debug, Clone, Default)]
206pub struct ContentTypes {
207 defaults: HashMap<String, String>,
209 overrides: HashMap<String, String>,
211}
212
213impl ContentTypes {
214 pub fn new() -> Self {
216 Self::default()
217 }
218
219 pub fn parse<R: Read>(reader: R) -> Result<Self> {
221 use quick_xml::Reader;
222 use quick_xml::events::Event;
223
224 let mut xml = Reader::from_reader(std::io::BufReader::new(reader));
225 xml.config_mut().trim_text(true);
226
227 let mut content_types = Self::new();
228 let mut buf = Vec::new();
229
230 loop {
231 match xml.read_event_into(&mut buf) {
232 Ok(Event::Empty(e)) => {
233 let name = e.name();
234 if name.as_ref() == b"Default" {
235 let mut extension = None;
236 let mut content_type = None;
237
238 for attr in e.attributes().filter_map(|a| a.ok()) {
239 match attr.key.as_ref() {
240 b"Extension" => {
241 extension =
242 Some(String::from_utf8_lossy(&attr.value).into_owned());
243 }
244 b"ContentType" => {
245 content_type =
246 Some(String::from_utf8_lossy(&attr.value).into_owned());
247 }
248 _ => {}
249 }
250 }
251
252 if let (Some(ext), Some(ct)) = (extension, content_type) {
253 content_types.defaults.insert(ext, ct);
254 }
255 } else if name.as_ref() == b"Override" {
256 let mut part_name = None;
257 let mut content_type = None;
258
259 for attr in e.attributes().filter_map(|a| a.ok()) {
260 match attr.key.as_ref() {
261 b"PartName" => {
262 part_name =
263 Some(String::from_utf8_lossy(&attr.value).into_owned());
264 }
265 b"ContentType" => {
266 content_type =
267 Some(String::from_utf8_lossy(&attr.value).into_owned());
268 }
269 _ => {}
270 }
271 }
272
273 if let (Some(pn), Some(ct)) = (part_name, content_type) {
274 let normalized = pn.strip_prefix('/').unwrap_or(&pn);
276 content_types.overrides.insert(normalized.to_string(), ct);
277 }
278 }
279 }
280 Ok(Event::Eof) => break,
281 Err(e) => return Err(Error::Xml(e)),
282 _ => {}
283 }
284 buf.clear();
285 }
286
287 Ok(content_types)
288 }
289
290 pub fn serialize(&self) -> String {
292 let mut xml = String::from(r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>"#);
293 xml.push('\n');
294 xml.push_str(
295 r#"<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">"#,
296 );
297
298 for (ext, ct) in &self.defaults {
299 xml.push_str(&format!(
300 r#"<Default Extension="{}" ContentType="{}"/>"#,
301 ext, ct
302 ));
303 }
304
305 for (part, ct) in &self.overrides {
306 let part_name = if part.starts_with('/') {
308 part.clone()
309 } else {
310 format!("/{}", part)
311 };
312 xml.push_str(&format!(
313 r#"<Override PartName="{}" ContentType="{}"/>"#,
314 part_name, ct
315 ));
316 }
317
318 xml.push_str("</Types>");
319 xml
320 }
321
322 pub fn add_default(&mut self, extension: &str, content_type: &str) {
324 self.defaults
325 .insert(extension.to_string(), content_type.to_string());
326 }
327
328 pub fn add_override(&mut self, part_name: &str, content_type: &str) {
330 let normalized = part_name.strip_prefix('/').unwrap_or(part_name);
331 self.overrides
332 .insert(normalized.to_string(), content_type.to_string());
333 }
334
335 pub fn get(&self, part_name: &str) -> Option<&str> {
337 let normalized = part_name.strip_prefix('/').unwrap_or(part_name);
338
339 if let Some(ct) = self.overrides.get(normalized) {
341 return Some(ct);
342 }
343
344 if let Some(ext) = normalized.rsplit('.').next()
346 && let Some(ct) = self.defaults.get(ext)
347 {
348 return Some(ct);
349 }
350
351 None
352 }
353
354 pub fn defaults(&self) -> impl Iterator<Item = (&str, &str)> {
356 self.defaults.iter().map(|(k, v)| (k.as_str(), v.as_str()))
357 }
358
359 pub fn overrides(&self) -> impl Iterator<Item = (&str, &str)> {
361 self.overrides.iter().map(|(k, v)| (k.as_str(), v.as_str()))
362 }
363}
364
365pub mod content_type {
367 pub const RELATIONSHIPS: &str = "application/vnd.openxmlformats-package.relationships+xml";
369
370 pub const XML: &str = "application/xml";
372
373 pub const WORDPROCESSING_DOCUMENT: &str =
375 "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml";
376
377 pub const WORDPROCESSING_STYLES: &str =
379 "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml";
380
381 pub const WORDPROCESSING_NUMBERING: &str =
383 "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml";
384
385 pub const WORDPROCESSING_HEADER: &str =
387 "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
388
389 pub const WORDPROCESSING_FOOTER: &str =
391 "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml";
392
393 pub const WORDPROCESSING_FOOTNOTES: &str =
395 "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml";
396
397 pub const WORDPROCESSING_ENDNOTES: &str =
399 "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml";
400
401 pub const WORDPROCESSING_COMMENTS: &str =
403 "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml";
404
405 pub const CORE_PROPERTIES: &str = "application/vnd.openxmlformats-package.core-properties+xml";
407
408 pub const EXTENDED_PROPERTIES: &str =
410 "application/vnd.openxmlformats-officedocument.extended-properties+xml";
411}
412
413#[cfg(test)]
414mod tests {
415 use super::*;
416 use std::io::Cursor;
417
418 fn create_test_package() -> Vec<u8> {
419 let mut buf = Cursor::new(Vec::new());
420
421 {
422 let mut writer = PackageWriter::new(&mut buf);
423
424 writer.add_default_content_type("rels", content_type::RELATIONSHIPS);
426 writer.add_default_content_type("xml", content_type::XML);
427
428 let document = r#"<?xml version="1.0" encoding="UTF-8"?>
430<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
431 <w:body>
432 <w:p><w:r><w:t>Hello!</w:t></w:r></w:p>
433 </w:body>
434</w:document>"#;
435 writer
436 .add_part(
437 "word/document.xml",
438 content_type::WORDPROCESSING_DOCUMENT,
439 document.as_bytes(),
440 )
441 .unwrap();
442
443 let rels = r#"<?xml version="1.0" encoding="UTF-8"?>
445<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
446 <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
447</Relationships>"#;
448 writer
449 .add_part("_rels/.rels", content_type::RELATIONSHIPS, rels.as_bytes())
450 .unwrap();
451
452 writer.finish().unwrap();
453 }
454
455 buf.into_inner()
456 }
457
458 #[test]
459 fn test_create_and_read_package() {
460 let data = create_test_package();
461 let cursor = Cursor::new(data);
462
463 let mut pkg = Package::open(cursor).unwrap();
464
465 assert_eq!(
467 pkg.content_type("word/document.xml"),
468 Some(content_type::WORDPROCESSING_DOCUMENT)
469 );
470 assert_eq!(
471 pkg.content_type("_rels/.rels"),
472 Some(content_type::RELATIONSHIPS)
473 );
474
475 assert!(pkg.has_part("word/document.xml"));
477 assert!(pkg.has_part("_rels/.rels"));
478 assert!(pkg.has_part("[Content_Types].xml"));
479
480 let doc = pkg.read_part_string("word/document.xml").unwrap();
482 assert!(doc.contains("Hello!"));
483
484 let rels = pkg.read_relationships().unwrap();
486 assert_eq!(rels.len(), 1);
487
488 let doc_rel = rels
489 .get_by_type(crate::relationships::rel_type::OFFICE_DOCUMENT)
490 .unwrap();
491 assert_eq!(doc_rel.target, "word/document.xml");
492 }
493
494 #[test]
495 fn test_content_types_roundtrip() {
496 let mut ct = ContentTypes::new();
497 ct.add_default("xml", "application/xml");
498 ct.add_default("rels", content_type::RELATIONSHIPS);
499 ct.add_override("/word/document.xml", content_type::WORDPROCESSING_DOCUMENT);
500
501 let xml = ct.serialize();
502 let parsed = ContentTypes::parse(xml.as_bytes()).unwrap();
503
504 assert_eq!(parsed.get("foo.xml"), Some("application/xml"));
505 assert_eq!(parsed.get("_rels/.rels"), Some(content_type::RELATIONSHIPS));
506 assert_eq!(
507 parsed.get("word/document.xml"),
508 Some(content_type::WORDPROCESSING_DOCUMENT)
509 );
510 }
511}