use std::collections::HashMap;
use std::io::Read;
use docspec_core::Error;
use quick_xml::events::Event;
use quick_xml::XmlVersion;
pub(crate) struct ContentTypes {
defaults: HashMap<String, String>,
overrides: HashMap<String, String>,
}
impl Default for ContentTypes {
#[inline]
fn default() -> Self {
Self {
defaults: HashMap::new(),
overrides: HashMap::new(),
}
}
}
pub(crate) fn parse(xml: &[u8]) -> docspec_core::Result<ContentTypes> {
if xml.is_empty() {
return Ok(ContentTypes::default());
}
let mut xml_reader = quick_xml::Reader::from_reader(xml);
let mut buf = Vec::new();
let mut element_depth: usize = 0;
let mut ct = ContentTypes::default();
loop {
match xml_reader.read_event_into(&mut buf) {
Ok(Event::Start(element)) => {
element_depth = element_depth.saturating_add(1);
process_element(&xml_reader, &element, &mut ct)?;
}
Ok(Event::Empty(element)) => {
process_element(&xml_reader, &element, &mut ct)?;
}
Ok(Event::End(_)) => {
let Some(next_depth) = element_depth.checked_sub(1) else {
return Err(parse_error("malformed [Content_Types].xml".to_string()));
};
element_depth = next_depth;
}
Ok(Event::Eof) => {
if element_depth != 0 {
return Err(parse_error("malformed [Content_Types].xml".to_string()));
}
return Ok(ct);
}
Err(err) => {
return Err(parse_error(format!("malformed [Content_Types].xml: {err}")));
}
Ok(_) => {}
}
buf.clear();
}
}
impl ContentTypes {
#[inline]
#[must_use]
pub(crate) fn lookup<'a>(&'a self, part_path: &str) -> Option<&'a str> {
if let Some(ct) = self.overrides.get(part_path) {
return Some(ct.as_str());
}
let file_name = part_path.rsplit('/').next().unwrap_or(part_path);
let (_, ext) = file_name.rsplit_once('.')?;
if ext.is_empty() {
return None;
}
self.defaults
.get(&ext.to_ascii_lowercase())
.map(String::as_str)
}
}
fn process_element<R: Read>(
reader: &quick_xml::Reader<R>,
element: &quick_xml::events::BytesStart<'_>,
ct: &mut ContentTypes,
) -> docspec_core::Result<()> {
match element.local_name().as_ref() {
b"Default" => {
let ext = attr_string(reader, element, b"Extension")?;
let content_type = attr_string(reader, element, b"ContentType")?;
if let (Some(ext_val), Some(ct_val)) = (ext, content_type) {
ct.defaults.insert(ext_val.to_ascii_lowercase(), ct_val);
}
}
b"Override" => {
let part_name = attr_string(reader, element, b"PartName")?;
let content_type = attr_string(reader, element, b"ContentType")?;
if let (Some(part), Some(ct_val)) = (part_name, content_type) {
let key = part.strip_prefix('/').unwrap_or(&part).to_string();
ct.overrides.insert(key, ct_val);
}
}
_ => {}
}
Ok(())
}
fn attr_string<R: Read>(
reader: &quick_xml::Reader<R>,
element: &quick_xml::events::BytesStart<'_>,
name: &[u8],
) -> docspec_core::Result<Option<String>> {
for attribute_result in element.attributes() {
let attribute = attribute_result
.map_err(|err| parse_error(format!("malformed [Content_Types].xml: {err}")))?;
if attribute.key.local_name().as_ref() == name {
return attribute
.decoded_and_normalized_value(XmlVersion::Implicit1_0, reader.decoder())
.map(|value| Some(value.into_owned()))
.map_err(|err| parse_error(format!("malformed [Content_Types].xml: {err}")));
}
}
Ok(None)
}
fn parse_error(message: String) -> Error {
Error::Parse {
message,
position: None,
}
}
#[cfg(test)]
mod tests {
#![allow(clippy::unwrap_used, clippy::expect_used)]
use super::*;
#[test]
fn default_extension_lookup() {
let xml = br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
<Default Extension="png" ContentType="image/png"/>
</Types>"#;
let ct = parse(xml).expect("parse should succeed");
assert_eq!(ct.lookup("word/media/image1.png"), Some("image/png"));
}
#[test]
fn override_beats_default() {
let xml = br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Default Extension="xml" ContentType="application/xml"/>
<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
</Types>"#;
let ct = parse(xml).expect("parse should succeed");
assert_eq!(
ct.lookup("word/document.xml"),
Some(
"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"
)
);
}
#[test]
fn default_when_empty() {
let ct = parse(&[]).expect("parse of empty input should succeed");
assert_eq!(ct.lookup("word/media/image1.png"), None);
assert_eq!(ct.lookup("word/document.xml"), None);
}
#[test]
fn extension_lookup_is_case_insensitive() {
let xml = br#"<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Default Extension="PNG" ContentType="image/png"/>
</Types>"#;
let ct = parse(xml).expect("parse should succeed");
assert_eq!(ct.lookup("word/media/photo.png"), Some("image/png"));
assert_eq!(ct.lookup("word/media/photo.PNG"), Some("image/png"));
}
#[test]
fn lookup_returns_none_for_unknown_extension() {
let xml = br#"<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Default Extension="png" ContentType="image/png"/>
</Types>"#;
let ct = parse(xml).expect("parse should succeed");
assert_eq!(ct.lookup("word/media/file.docx"), None);
}
#[test]
fn override_strips_leading_slash_from_part_name() {
let xml = br#"<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Override PartName="/word/document.xml" ContentType="application/vnd.docx"/>
</Types>"#;
let ct = parse(xml).expect("parse should succeed");
assert_eq!(ct.lookup("word/document.xml"), Some("application/vnd.docx"));
assert_eq!(ct.lookup("/word/document.xml"), None);
}
#[test]
fn non_self_closing_elements_are_parsed() {
let xml = br#"<?xml version="1.0"?>
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Default Extension="png" ContentType="image/png"></Default>
<Override PartName="/word/document.xml" ContentType="application/vnd.docx"></Override>
</Types>"#;
let ct = parse(xml).expect("parse should succeed");
assert_eq!(ct.lookup("word/media/image1.png"), Some("image/png"));
assert_eq!(ct.lookup("word/document.xml"), Some("application/vnd.docx"));
}
#[test]
fn malformed_xml_returns_error() {
let result = parse(b"<Types><broken>");
assert!(matches!(result, Err(docspec_core::Error::Parse { .. })));
}
#[test]
fn multiple_defaults_and_overrides() {
let xml = br#"<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Default Extension="rels" ContentType="application/vnd.rels"/>
<Default Extension="xml" ContentType="application/xml"/>
<Default Extension="png" ContentType="image/png"/>
<Default Extension="jpeg" ContentType="image/jpeg"/>
<Override PartName="/word/document.xml" ContentType="application/vnd.docx"/>
<Override PartName="/word/styles.xml" ContentType="application/vnd.styles"/>
</Types>"#;
let ct = parse(xml).expect("parse should succeed");
assert_eq!(ct.lookup("word/media/image1.png"), Some("image/png"));
assert_eq!(ct.lookup("word/media/photo.jpeg"), Some("image/jpeg"));
assert_eq!(ct.lookup("word/document.xml"), Some("application/vnd.docx"));
assert_eq!(ct.lookup("word/styles.xml"), Some("application/vnd.styles"));
assert_eq!(ct.lookup("word/theme/theme1.xml"), Some("application/xml"));
}
#[test]
fn lookup_no_extension_returns_none() {
let xml = br#"<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Default Extension="xml" ContentType="application/xml"/>
</Types>"#;
let ct = parse(xml).expect("parse should succeed");
assert_eq!(ct.lookup("noextension"), None);
}
#[test]
fn lookup_extensionless_part_does_not_match_default_with_same_name() {
let xml = br#"<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Default Extension="png" ContentType="image/png"/>
</Types>"#;
let ct = parse(xml).expect("parse should succeed");
assert_eq!(ct.lookup("png"), None);
assert_eq!(ct.lookup("media/png"), None);
assert_eq!(ct.lookup("dotted.dir/noext"), None);
assert_eq!(ct.lookup("trailing."), None);
}
}