Skip to main content

ooxml_opc/
packaging.rs

1//! OPC (Open Packaging Conventions) implementation.
2//!
3//! OOXML files are ZIP archives following the OPC specification (ECMA-376 Part 2).
4//! This module handles reading and writing these packages.
5//!
6//! # Structure
7//!
8//! An OPC package contains:
9//! - `[Content_Types].xml` - MIME type mappings for parts
10//! - `_rels/.rels` - Package-level relationships
11//! - Various parts (XML files, images, etc.)
12//! - Part-specific relationships in `*/_rels/*.rels`
13
14use crate::error::{Error, Result};
15use std::collections::HashMap;
16use std::io::{Read, Seek, Write};
17use zip::read::ZipArchive;
18use zip::write::ZipWriter;
19
20/// An OPC package (ZIP-based container for OOXML files).
21pub struct Package<R> {
22    archive: ZipArchive<R>,
23    content_types: ContentTypes,
24}
25
26impl<R: Read + Seek> Package<R> {
27    /// Open an OPC package from a reader.
28    pub fn open(reader: R) -> Result<Self> {
29        let mut archive = ZipArchive::new(reader)?;
30
31        // Parse [Content_Types].xml (required)
32        let content_types = Self::read_content_types(&mut archive)?;
33
34        Ok(Self {
35            archive,
36            content_types,
37        })
38    }
39
40    /// Read [Content_Types].xml from the archive.
41    fn read_content_types(archive: &mut ZipArchive<R>) -> Result<ContentTypes> {
42        let file = archive
43            .by_name("[Content_Types].xml")
44            .map_err(|_| Error::MissingPart("[Content_Types].xml".into()))?;
45
46        ContentTypes::parse(file)
47    }
48
49    /// Get the content types for this package.
50    pub fn content_types(&self) -> &ContentTypes {
51        &self.content_types
52    }
53
54    /// Check if a part exists in the package.
55    pub fn has_part(&self, path: &str) -> bool {
56        self.archive.file_names().any(|name| name == path)
57    }
58
59    /// List all parts in the package.
60    pub fn parts(&self) -> impl Iterator<Item = &str> {
61        self.archive.file_names()
62    }
63
64    /// Read a part's contents as bytes.
65    pub fn read_part(&mut self, path: &str) -> Result<Vec<u8>> {
66        let mut file = self
67            .archive
68            .by_name(path)
69            .map_err(|_| Error::MissingPart(path.into()))?;
70
71        let mut contents = Vec::new();
72        file.read_to_end(&mut contents)?;
73        Ok(contents)
74    }
75
76    /// Read a part's contents as a string.
77    pub fn read_part_string(&mut self, path: &str) -> Result<String> {
78        let bytes = self.read_part(path)?;
79        String::from_utf8(bytes)
80            .map_err(|e| Error::Invalid(format!("invalid UTF-8 in {}: {}", path, e)))
81    }
82
83    /// Get the content type for a part.
84    pub fn content_type(&self, path: &str) -> Option<&str> {
85        self.content_types.get(path)
86    }
87
88    /// Read package-level relationships (_rels/.rels).
89    pub fn read_relationships(&mut self) -> Result<crate::relationships::Relationships> {
90        self.read_part_relationships("")
91    }
92
93    /// Copy all parts to a writer, replacing specific parts with new content.
94    ///
95    /// This is the core mechanism for roundtrip preservation: it copies every part
96    /// from the source package verbatim, except for parts that have replacement
97    /// bytes provided. `[Content_Types].xml` is skipped since `PackageWriter::finish()`
98    /// regenerates it.
99    ///
100    /// Both default and override content types from the original package are
101    /// transferred to the writer.
102    pub fn copy_to_writer<W: Write + Seek>(
103        &mut self,
104        writer: &mut PackageWriter<W>,
105        replacements: &HashMap<&str, &[u8]>,
106    ) -> Result<()> {
107        // Transfer all default content types from original package
108        for (ext, ct) in self.content_types.defaults() {
109            writer.add_default_content_type(ext, ct);
110        }
111
112        // Collect part names and their content types (excluding [Content_Types].xml)
113        let parts_info: Vec<(String, String)> = self
114            .parts()
115            .filter(|name| *name != "[Content_Types].xml")
116            .map(|name| {
117                let ct = self
118                    .content_types
119                    .get(name)
120                    .unwrap_or("application/octet-stream")
121                    .to_string();
122                (name.to_string(), ct)
123            })
124            .collect();
125
126        // Copy each part, using replacement bytes when provided
127        for (name, ct) in &parts_info {
128            let data = if let Some(replacement) = replacements.get(name.as_str()) {
129                replacement.to_vec()
130            } else {
131                self.read_part(name)?
132            };
133            writer.add_part(name, ct, &data)?;
134        }
135
136        Ok(())
137    }
138
139    /// Read relationships for a specific part.
140    pub fn read_part_relationships(
141        &mut self,
142        part_path: &str,
143    ) -> Result<crate::relationships::Relationships> {
144        let rels_path = crate::relationships::rels_path_for(part_path);
145
146        if !self.has_part(&rels_path) {
147            return Ok(crate::relationships::Relationships::new());
148        }
149
150        let data = self.read_part(&rels_path)?;
151        crate::relationships::Relationships::parse(&data[..])
152    }
153}
154
155/// Builder for creating new OPC packages.
156pub struct PackageWriter<W: Write + Seek> {
157    writer: ZipWriter<W>,
158    content_types: ContentTypes,
159}
160
161impl<W: Write + Seek> PackageWriter<W> {
162    /// Create a new package writer.
163    pub fn new(writer: W) -> Self {
164        Self {
165            writer: ZipWriter::new(writer),
166            content_types: ContentTypes::new(),
167        }
168    }
169
170    /// Add a part to the package.
171    pub fn add_part(&mut self, path: &str, content_type: &str, data: &[u8]) -> Result<()> {
172        // Register content type
173        self.content_types.add_override(path, content_type);
174
175        // Write to ZIP
176        let options = zip::write::SimpleFileOptions::default()
177            .compression_method(zip::CompressionMethod::Deflated);
178        self.writer.start_file(path, options)?;
179        self.writer.write_all(data)?;
180
181        Ok(())
182    }
183
184    /// Add a default content type mapping for a file extension.
185    pub fn add_default_content_type(&mut self, extension: &str, content_type: &str) {
186        self.content_types.add_default(extension, content_type);
187    }
188
189    /// Finish writing the package.
190    pub fn finish(mut self) -> Result<W> {
191        // Write [Content_Types].xml
192        let content_types_xml = self.content_types.serialize();
193        let options = zip::write::SimpleFileOptions::default()
194            .compression_method(zip::CompressionMethod::Deflated);
195        self.writer.start_file("[Content_Types].xml", options)?;
196        self.writer.write_all(content_types_xml.as_bytes())?;
197
198        Ok(self.writer.finish()?)
199    }
200}
201
202/// Content type mappings for package parts.
203///
204/// Maps file extensions and specific part names to MIME types.
205#[derive(Debug, Clone, Default)]
206pub struct ContentTypes {
207    /// Default mappings by extension (e.g., "xml" -> "application/xml").
208    defaults: HashMap<String, String>,
209    /// Override mappings for specific parts (e.g., "/word/document.xml" -> "...").
210    overrides: HashMap<String, String>,
211}
212
213impl ContentTypes {
214    /// Create empty content types.
215    pub fn new() -> Self {
216        Self::default()
217    }
218
219    /// Parse content types from XML.
220    pub fn parse<R: Read>(reader: R) -> Result<Self> {
221        use quick_xml::Reader;
222        use quick_xml::events::Event;
223
224        let mut xml = Reader::from_reader(std::io::BufReader::new(reader));
225        xml.config_mut().trim_text(true);
226
227        let mut content_types = Self::new();
228        let mut buf = Vec::new();
229
230        loop {
231            match xml.read_event_into(&mut buf) {
232                Ok(Event::Empty(e)) => {
233                    let name = e.name();
234                    if name.as_ref() == b"Default" {
235                        let mut extension = None;
236                        let mut content_type = None;
237
238                        for attr in e.attributes().filter_map(|a| a.ok()) {
239                            match attr.key.as_ref() {
240                                b"Extension" => {
241                                    extension =
242                                        Some(String::from_utf8_lossy(&attr.value).into_owned());
243                                }
244                                b"ContentType" => {
245                                    content_type =
246                                        Some(String::from_utf8_lossy(&attr.value).into_owned());
247                                }
248                                _ => {}
249                            }
250                        }
251
252                        if let (Some(ext), Some(ct)) = (extension, content_type) {
253                            content_types.defaults.insert(ext, ct);
254                        }
255                    } else if name.as_ref() == b"Override" {
256                        let mut part_name = None;
257                        let mut content_type = None;
258
259                        for attr in e.attributes().filter_map(|a| a.ok()) {
260                            match attr.key.as_ref() {
261                                b"PartName" => {
262                                    part_name =
263                                        Some(String::from_utf8_lossy(&attr.value).into_owned());
264                                }
265                                b"ContentType" => {
266                                    content_type =
267                                        Some(String::from_utf8_lossy(&attr.value).into_owned());
268                                }
269                                _ => {}
270                            }
271                        }
272
273                        if let (Some(pn), Some(ct)) = (part_name, content_type) {
274                            // Normalize path (remove leading /)
275                            let normalized = pn.strip_prefix('/').unwrap_or(&pn);
276                            content_types.overrides.insert(normalized.to_string(), ct);
277                        }
278                    }
279                }
280                Ok(Event::Eof) => break,
281                Err(e) => return Err(Error::Xml(e)),
282                _ => {}
283            }
284            buf.clear();
285        }
286
287        Ok(content_types)
288    }
289
290    /// Serialize content types to XML.
291    pub fn serialize(&self) -> String {
292        let mut xml = String::from(r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>"#);
293        xml.push('\n');
294        xml.push_str(
295            r#"<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">"#,
296        );
297
298        for (ext, ct) in &self.defaults {
299            xml.push_str(&format!(
300                r#"<Default Extension="{}" ContentType="{}"/>"#,
301                ext, ct
302            ));
303        }
304
305        for (part, ct) in &self.overrides {
306            // Ensure leading /
307            let part_name = if part.starts_with('/') {
308                part.clone()
309            } else {
310                format!("/{}", part)
311            };
312            xml.push_str(&format!(
313                r#"<Override PartName="{}" ContentType="{}"/>"#,
314                part_name, ct
315            ));
316        }
317
318        xml.push_str("</Types>");
319        xml
320    }
321
322    /// Add a default content type mapping.
323    pub fn add_default(&mut self, extension: &str, content_type: &str) {
324        self.defaults
325            .insert(extension.to_string(), content_type.to_string());
326    }
327
328    /// Add an override content type mapping.
329    pub fn add_override(&mut self, part_name: &str, content_type: &str) {
330        let normalized = part_name.strip_prefix('/').unwrap_or(part_name);
331        self.overrides
332            .insert(normalized.to_string(), content_type.to_string());
333    }
334
335    /// Get the content type for a part.
336    pub fn get(&self, part_name: &str) -> Option<&str> {
337        let normalized = part_name.strip_prefix('/').unwrap_or(part_name);
338
339        // Check overrides first
340        if let Some(ct) = self.overrides.get(normalized) {
341            return Some(ct);
342        }
343
344        // Fall back to default by extension
345        if let Some(ext) = normalized.rsplit('.').next()
346            && let Some(ct) = self.defaults.get(ext)
347        {
348            return Some(ct);
349        }
350
351        None
352    }
353
354    /// Iterate over default mappings.
355    pub fn defaults(&self) -> impl Iterator<Item = (&str, &str)> {
356        self.defaults.iter().map(|(k, v)| (k.as_str(), v.as_str()))
357    }
358
359    /// Iterate over override mappings.
360    pub fn overrides(&self) -> impl Iterator<Item = (&str, &str)> {
361        self.overrides.iter().map(|(k, v)| (k.as_str(), v.as_str()))
362    }
363}
364
365/// Common content types used in OOXML packages.
366pub mod content_type {
367    /// Relationships content type.
368    pub const RELATIONSHIPS: &str = "application/vnd.openxmlformats-package.relationships+xml";
369
370    /// XML content type.
371    pub const XML: &str = "application/xml";
372
373    /// WordprocessingML document.
374    pub const WORDPROCESSING_DOCUMENT: &str =
375        "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml";
376
377    /// WordprocessingML styles.
378    pub const WORDPROCESSING_STYLES: &str =
379        "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml";
380
381    /// WordprocessingML numbering definitions.
382    pub const WORDPROCESSING_NUMBERING: &str =
383        "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml";
384
385    /// WordprocessingML header.
386    pub const WORDPROCESSING_HEADER: &str =
387        "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
388
389    /// WordprocessingML footer.
390    pub const WORDPROCESSING_FOOTER: &str =
391        "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml";
392
393    /// WordprocessingML footnotes.
394    pub const WORDPROCESSING_FOOTNOTES: &str =
395        "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml";
396
397    /// WordprocessingML endnotes.
398    pub const WORDPROCESSING_ENDNOTES: &str =
399        "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml";
400
401    /// WordprocessingML comments.
402    pub const WORDPROCESSING_COMMENTS: &str =
403        "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml";
404
405    /// Core properties (Dublin Core metadata).
406    pub const CORE_PROPERTIES: &str = "application/vnd.openxmlformats-package.core-properties+xml";
407
408    /// Extended properties (app-specific metadata).
409    pub const EXTENDED_PROPERTIES: &str =
410        "application/vnd.openxmlformats-officedocument.extended-properties+xml";
411}
412
413#[cfg(test)]
414mod tests {
415    use super::*;
416    use std::io::Cursor;
417
418    fn create_test_package() -> Vec<u8> {
419        let mut buf = Cursor::new(Vec::new());
420
421        {
422            let mut writer = PackageWriter::new(&mut buf);
423
424            // Add default content types
425            writer.add_default_content_type("rels", content_type::RELATIONSHIPS);
426            writer.add_default_content_type("xml", content_type::XML);
427
428            // Add main document
429            let document = r#"<?xml version="1.0" encoding="UTF-8"?>
430<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
431  <w:body>
432    <w:p><w:r><w:t>Hello!</w:t></w:r></w:p>
433  </w:body>
434</w:document>"#;
435            writer
436                .add_part(
437                    "word/document.xml",
438                    content_type::WORDPROCESSING_DOCUMENT,
439                    document.as_bytes(),
440                )
441                .unwrap();
442
443            // Add relationships
444            let rels = r#"<?xml version="1.0" encoding="UTF-8"?>
445<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
446  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
447</Relationships>"#;
448            writer
449                .add_part("_rels/.rels", content_type::RELATIONSHIPS, rels.as_bytes())
450                .unwrap();
451
452            writer.finish().unwrap();
453        }
454
455        buf.into_inner()
456    }
457
458    #[test]
459    fn test_create_and_read_package() {
460        let data = create_test_package();
461        let cursor = Cursor::new(data);
462
463        let mut pkg = Package::open(cursor).unwrap();
464
465        // Check content types
466        assert_eq!(
467            pkg.content_type("word/document.xml"),
468            Some(content_type::WORDPROCESSING_DOCUMENT)
469        );
470        assert_eq!(
471            pkg.content_type("_rels/.rels"),
472            Some(content_type::RELATIONSHIPS)
473        );
474
475        // Check parts exist
476        assert!(pkg.has_part("word/document.xml"));
477        assert!(pkg.has_part("_rels/.rels"));
478        assert!(pkg.has_part("[Content_Types].xml"));
479
480        // Read document
481        let doc = pkg.read_part_string("word/document.xml").unwrap();
482        assert!(doc.contains("Hello!"));
483
484        // Read relationships
485        let rels = pkg.read_relationships().unwrap();
486        assert_eq!(rels.len(), 1);
487
488        let doc_rel = rels
489            .get_by_type(crate::relationships::rel_type::OFFICE_DOCUMENT)
490            .unwrap();
491        assert_eq!(doc_rel.target, "word/document.xml");
492    }
493
494    #[test]
495    fn test_content_types_roundtrip() {
496        let mut ct = ContentTypes::new();
497        ct.add_default("xml", "application/xml");
498        ct.add_default("rels", content_type::RELATIONSHIPS);
499        ct.add_override("/word/document.xml", content_type::WORDPROCESSING_DOCUMENT);
500
501        let xml = ct.serialize();
502        let parsed = ContentTypes::parse(xml.as_bytes()).unwrap();
503
504        assert_eq!(parsed.get("foo.xml"), Some("application/xml"));
505        assert_eq!(parsed.get("_rels/.rels"), Some(content_type::RELATIONSHIPS));
506        assert_eq!(
507            parsed.get("word/document.xml"),
508            Some(content_type::WORDPROCESSING_DOCUMENT)
509        );
510    }
511}