use crate::error::Error;
use crate::summary::Metadata;
use quick_xml::events::Event;
use quick_xml::Reader;
#[derive(Debug, Default)]
pub struct Manifest {
pub metadata: Metadata,
pub section_files: Vec<String>,
pub assets: crate::assets::AssetCatalog,
}
pub fn parse(bytes: &[u8]) -> Result<Manifest, Error> {
let mut reader = Reader::from_reader(bytes);
reader.config_mut().trim_text(true);
let mut out = Manifest::default();
let mut in_title = false;
let mut current_meta_name: Option<String> = None;
loop {
match reader
.read_event()
.map_err(|e| Error::Container(format!("content.hpf xml: {e}")))?
{
Event::Start(e) => {
let name = e.name();
let local = name.local_name();
let local_bytes = local.as_ref();
match local_bytes {
b"title" => in_title = true,
b"meta" => {
current_meta_name = get_attr(&e, b"name");
}
b"item" => {
if let Some(href) = get_attr(&e, b"href") {
if href.starts_with("Contents/section") && href.ends_with(".xml") {
out.section_files.push(href);
}
}
}
_ => {}
}
}
Event::Empty(e) => {
let name = e.name();
let local = name.local_name();
let local_bytes = local.as_ref();
match local_bytes {
b"item" => {
if let Some(href) = get_attr(&e, b"href") {
if href.starts_with("Contents/section") && href.ends_with(".xml") {
out.section_files.push(href.clone());
}
let media_type = get_attr(&e, b"media-type").unwrap_or_default();
if href.starts_with("BinData/") || media_type.starts_with("image/") {
let id = parse_bin_id(&href)
.unwrap_or((out.assets.entries.len() + 1) as u16);
let format =
href.rsplit('.').next().unwrap_or("").to_ascii_lowercase();
out.assets.entries.push(crate::assets::BinDataEntry {
id,
name: href,
kind: "EMBEDDING".into(),
format,
size_bytes: None,
});
}
}
}
b"meta" => {
current_meta_name = None;
}
b"title" => {
}
_ => {}
}
}
Event::Text(t) => {
let s = t
.unescape()
.map_err(|e| Error::Container(format!("content.hpf text: {e}")))?
.into_owned();
if s.is_empty() {
continue;
}
if in_title {
out.metadata.title = Some(s);
} else if let Some(meta_name) = ¤t_meta_name {
match meta_name.as_str() {
"creator" => out.metadata.author = Some(s),
"lastsaveby" => out.metadata.last_author = Some(s),
"CreatedDate" => out.metadata.created_at = Some(s),
"ModifiedDate" => out.metadata.modified_at = Some(s),
"subject" => out.metadata.subject = Some(s),
"keyword" | "keywords" => out.metadata.keywords = Some(s),
"description" => out.metadata.comments = Some(s),
_ => {}
}
}
}
Event::End(e) => {
let name = e.name();
let local = name.local_name();
match local.as_ref() {
b"title" => in_title = false,
b"meta" => current_meta_name = None,
_ => {}
}
}
Event::Eof => break,
_ => {}
}
}
out.section_files.sort();
Ok(out)
}
fn get_attr(e: &quick_xml::events::BytesStart, key: &[u8]) -> Option<String> {
for a in e.attributes().flatten() {
if a.key.as_ref() == key || a.key.local_name().as_ref() == key {
if let Ok(v) = a.unescape_value() {
return Some(v.into_owned());
}
}
}
None
}
fn parse_bin_id(href: &str) -> Option<u16> {
let digits: String = href.chars().skip_while(|c| !c.is_ascii_digit()).collect();
let trimmed: String = digits.chars().take_while(|c| c.is_ascii_digit()).collect();
if trimmed.is_empty() {
return None;
}
trimmed.parse::<u16>().ok()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extracts_bindata_items() {
let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
<opf:package xmlns:opf="http://www.idpf.org/2007/opf/">
<opf:manifest>
<opf:item id="header" href="Contents/header.xml" media-type="application/xml"/>
<opf:item id="section0" href="Contents/section0.xml" media-type="application/xml"/>
<opf:item id="img1" href="BinData/image1.png" media-type="image/png"/>
<opf:item id="img2" href="BinData/image2.jpg" media-type="image/jpeg"/>
</opf:manifest>
</opf:package>"#;
let m = parse(xml.as_bytes()).expect("parse");
assert_eq!(m.assets.entries.len(), 2);
assert_eq!(m.assets.entries[0].id, 1);
assert_eq!(m.assets.entries[0].name, "BinData/image1.png");
assert_eq!(m.assets.entries[0].format, "png");
assert_eq!(m.assets.entries[0].kind, "EMBEDDING");
assert_eq!(m.assets.entries[1].id, 2);
assert_eq!(m.assets.entries[1].format, "jpg");
}
#[test]
fn parse_bin_id_variants() {
assert_eq!(parse_bin_id("BinData/image1.png"), Some(1));
assert_eq!(parse_bin_id("BinData/BIN0042.jpg"), Some(42));
assert_eq!(parse_bin_id("BinData/logo.png"), None);
}
}