use crate::Result;
use crate::types::ProcessingWarning;
use roxmltree;
use std::collections::{BTreeMap, BTreeSet};
use super::parsing::resolve_path;
#[derive(Debug, Default, Clone)]
pub(super) struct OepbMetadata {
pub(super) title: Option<String>,
pub(super) creator: Option<String>,
pub(super) date: Option<String>,
pub(super) language: Option<String>,
pub(super) identifier: Option<String>,
pub(super) publisher: Option<String>,
pub(super) subject: Option<String>,
pub(super) description: Option<String>,
pub(super) rights: Option<String>,
pub(super) coverage: Option<String>,
pub(super) format: Option<String>,
pub(super) relation: Option<String>,
pub(super) source: Option<String>,
pub(super) dc_type: Option<String>,
pub(super) cover_image_href: Option<String>,
}
#[derive(Debug, Clone)]
pub(super) struct EpubPackageDocument {
pub(super) metadata: OepbMetadata,
pub(super) manifest: BTreeMap<String, ManifestItem>,
pub(super) spine_items: Vec<EpubSpineItem>,
guide_toc_paths: BTreeSet<String>,
}
#[allow(dead_code)]
impl EpubPackageDocument {
pub(super) fn is_guide_toc_candidate_path(&self, path: &str) -> bool {
self.guide_toc_paths.contains(path)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(super) struct EpubSpineItem {
pub(super) idref: String,
}
#[derive(Debug, Clone)]
pub(super) struct ManifestItem {
pub(super) raw_href: String,
pub(super) path: Option<String>,
path_resolution_error: Option<String>,
#[allow(dead_code)]
pub(super) media_type: Option<String>,
#[allow(dead_code)]
pub(super) fallback: Option<String>,
pub(super) properties: Option<String>,
}
#[allow(dead_code)]
impl ManifestItem {
pub(super) fn is_renderable_body_document(&self) -> bool {
matches!(
self.media_type.as_deref(),
Some("application/xhtml+xml") | Some("application/x-dtbook+xml")
) || self.media_type.is_none() && has_renderable_extension(&self.raw_href)
}
pub(super) fn is_nav(&self) -> bool {
self.properties
.as_deref()
.is_some_and(|p| p.split_ascii_whitespace().any(|v| v.eq_ignore_ascii_case("nav")))
}
pub(super) fn resolved_path(&self) -> std::result::Result<&str, String> {
self.path.as_deref().ok_or_else(|| {
self.path_resolution_error
.clone()
.unwrap_or_else(|| format!("unable to resolve manifest href '{}'", self.raw_href))
})
}
}
#[allow(dead_code)]
fn has_renderable_extension(href: &str) -> bool {
let href = href
.split_once('#')
.map(|(path, _)| path)
.unwrap_or(href)
.rsplit('/')
.next()
.unwrap_or(href);
href.rsplit_once('.')
.map(|(_, ext)| {
matches!(
ext.to_ascii_lowercase().as_str(),
"xhtml" | "html" | "htm" | "xml" | "dtbook"
)
})
.unwrap_or(false)
}
pub(super) fn parse_opf(xml: &str, opf_dir: &str) -> Result<(EpubPackageDocument, Vec<ProcessingWarning>)> {
match roxmltree::Document::parse(xml) {
Ok(doc) => {
let root = doc.root();
let mut warnings = Vec::new();
let mut package = EpubPackageDocument {
metadata: OepbMetadata::default(),
manifest: BTreeMap::new(),
spine_items: Vec::new(),
guide_toc_paths: BTreeSet::new(),
};
let mut manifest: BTreeMap<String, ManifestItem> = BTreeMap::new();
for node in root.descendants() {
match node.tag_name().name() {
"title" => {
if let Some(text) = node.text() {
package.metadata.title = Some(text.trim().to_string());
}
}
"creator" => {
if let Some(text) = node.text() {
package.metadata.creator = Some(text.trim().to_string());
}
}
"date" => {
if let Some(text) = node.text() {
package.metadata.date = Some(text.trim().to_string());
}
}
"language" => {
if let Some(text) = node.text() {
package.metadata.language = Some(text.trim().to_string());
}
}
"identifier" => {
if let Some(text) = node.text() {
package.metadata.identifier = Some(text.trim().to_string());
}
}
"publisher" => {
if let Some(text) = node.text() {
package.metadata.publisher = Some(text.trim().to_string());
}
}
"subject" => {
if let Some(text) = node.text() {
package.metadata.subject = Some(text.trim().to_string());
}
}
"description" => {
if let Some(text) = node.text() {
package.metadata.description = Some(text.trim().to_string());
}
}
"rights" => {
if let Some(text) = node.text() {
package.metadata.rights = Some(text.trim().to_string());
}
}
"coverage" => {
if let Some(text) = node.text() {
package.metadata.coverage = Some(text.trim().to_string());
}
}
"format" => {
if let Some(text) = node.text() {
package.metadata.format = Some(text.trim().to_string());
}
}
"relation" => {
if let Some(text) = node.text() {
package.metadata.relation = Some(text.trim().to_string());
}
}
"source" => {
if let Some(text) = node.text() {
package.metadata.source = Some(text.trim().to_string());
}
}
"type" => {
if let Some(text) = node.text() {
package.metadata.dc_type = Some(text.trim().to_string());
}
}
"item" => {
if let Some(id) = node.attribute("id")
&& let Some(href) = node.attribute("href")
{
let (path, path_resolution_error) = match resolve_path(opf_dir, href) {
Ok(resolved_href) => (Some(resolved_href.path), None),
Err(err) => (None, Some(err.to_string())),
};
manifest.insert(
id.to_string(),
ManifestItem {
raw_href: href.to_string(),
path,
path_resolution_error,
media_type: node.attribute("media-type").map(ToString::to_string),
fallback: node.attribute("fallback").map(ToString::to_string),
properties: node.attribute("properties").map(ToString::to_string),
},
);
}
}
"reference" => {
if node
.attribute("type")
.is_some_and(|kind| kind.eq_ignore_ascii_case("toc"))
&& let Some(href) = node.attribute("href")
{
match resolve_path(opf_dir, href) {
Ok(resolved_href) => {
package.guide_toc_paths.insert(resolved_href.path);
}
Err(e) => {
warnings.push(ProcessingWarning {
source: std::borrow::Cow::Borrowed("epub"),
message: std::borrow::Cow::Owned(format!(
"Skipping malformed guide reference '{}': {}",
href, e
)),
});
}
}
}
}
_ => {}
}
}
let mut cover_item_id = None;
for node in root.descendants() {
if node.tag_name().name() == "meta"
&& node.attribute("name") == Some("cover")
&& let Some(content) = node.attribute("content")
{
cover_item_id = Some(content.to_string());
break;
}
}
if let Some(cover_id) = cover_item_id
&& let Some(href) = manifest.get(&cover_id)
&& let Ok(path) = href.resolved_path()
{
package.metadata.cover_image_href = Some(path.to_string());
}
for node in root.descendants() {
if node.tag_name().name() == "itemref"
&& let Some(idref) = node.attribute("idref")
{
package.spine_items.push(EpubSpineItem {
idref: idref.to_string(),
});
}
}
package.manifest = manifest;
Ok((package, warnings))
}
Err(e) => Err(crate::KreuzbergError::Parsing {
message: format!("Failed to parse OPF file: {}", e),
source: None,
}),
}
}
pub(super) fn build_additional_metadata(epub_metadata: &OepbMetadata) -> BTreeMap<String, serde_json::Value> {
let mut additional_metadata = BTreeMap::new();
if let Some(ref identifier) = epub_metadata.identifier {
additional_metadata.insert("identifier".to_string(), serde_json::json!(identifier.clone()));
}
if let Some(ref publisher) = epub_metadata.publisher {
additional_metadata.insert("publisher".to_string(), serde_json::json!(publisher.clone()));
}
if let Some(ref subject) = epub_metadata.subject {
additional_metadata.insert("subject".to_string(), serde_json::json!(subject.clone()));
}
if let Some(ref description) = epub_metadata.description {
additional_metadata.insert("description".to_string(), serde_json::json!(description.clone()));
}
if let Some(ref rights) = epub_metadata.rights {
additional_metadata.insert("rights".to_string(), serde_json::json!(rights.clone()));
}
additional_metadata
}