lib_epub/
epub.rs

1//! The core module of the EPUB parsing library
2//!
3//! This module provides complete parsing functionality for EPUB ebook files
4//! and is the core component of the entire library. The `EpubDoc` structure
5//! encapsulates all the parsing logic and data access interfaces for EPUB files.
6//!
7//! ## Main references to EPUB specs:
8//! - <https://www.w3.org/TR/epub-33>
9//! - <https://idpf.org/epub/201>
10//!
11//! ## Potential Issues
12//! - The generic parameter `R: Read + Seek` increases complexity, particularly
13//!   in asynchronous environments. The current design is not conducive to multi-threaded
14//!   concurrent access and requires an external synchronization mechanism.
15//! - Some error handling may not be sufficiently nuanced, and certain edge cases
16//!   may not be adequately considered.
17//! - Loading the entire EPUB document at once may result in significant memory consumption,
18//!   especially for large publications.
19//!
20//! ## Future Work
21//! - Supports more EPUB specification features, such as media overlay and scripts.
22
23use std::{
24    collections::HashMap,
25    fs::{self, File},
26    io::{BufReader, Read, Seek},
27    path::{Path, PathBuf},
28    sync::{
29        Arc, Mutex,
30        atomic::{AtomicUsize, Ordering},
31    },
32};
33
34#[cfg(not(feature = "no-indexmap"))]
35use indexmap::IndexMap;
36use zip::{ZipArchive, result::ZipError};
37
38use crate::{
39    error::EpubError,
40    types::{
41        EncryptionData, EpubVersion, ManifestItem, MetadataItem, MetadataLinkItem,
42        MetadataRefinement, MetadataSheet, NavPoint, SpineItem,
43    },
44    utils::{
45        DecodeBytes, NormalizeWhitespace, XmlElement, XmlReader, adobe_font_dencryption,
46        check_realtive_link_leakage, compression_method_check, get_file_in_zip_archive,
47        idpf_font_dencryption,
48    },
49};
50
51/// EPUB document parser, representing a loaded and parsed EPUB publication
52///
53/// The `EpubDoc` structure is the core of the entire EPUB parsing library.
54/// It encapsulates all the parsing logic and data access interfaces for EPUB files.
55/// It is responsible for parsing various components of an EPUB, including metadata,
56/// manifests, reading order, table of contents navigation, and encrypted information,
57/// and provides methods for accessing this data.
58///
59/// Provides a unified data access interface for EPUB files, hiding the underlying
60/// file structure and parsing details. Strictly adheres to the EPUB specification
61/// in implementing the parsing logic to ensure compatibility with the standard.
62///
63/// ## Usage
64///
65/// ```rust
66/// use lib_epub::epub::EpubDoc;
67///
68/// let doc = EpubDoc::new("./test_case/epub-33.epub");
69/// assert!(doc.is_ok());
70/// ```
71///
72/// ## Notes
73/// - The `EpubDoc` structure is thread-safe **if and only if** the structure is immutable.
74/// - The fact that `EpubDoc` is mutable has no practical meaning; modifications
75///   to the structure data are not stored in the epub file.
76pub struct EpubDoc<R: Read + Seek> {
77    /// The structure of the epub file that actually holds it
78    pub(crate) archive: Arc<Mutex<ZipArchive<R>>>,
79
80    /// The path to the target epub file
81    pub(crate) epub_path: PathBuf,
82
83    /// The path to the OPF file
84    pub package_path: PathBuf,
85
86    /// The path to the directory where the opf file is located
87    pub base_path: PathBuf,
88
89    /// The epub version
90    pub version: EpubVersion,
91
92    /// The unique identifier of the epub file
93    ///
94    /// This identifier is the actual value of the unique-identifier attribute of the package.
95    pub unique_identifier: String,
96
97    /// Epub metadata extracted from OPF
98    pub metadata: Vec<MetadataItem>,
99
100    /// Data in metadata that points to external files
101    pub metadata_link: Vec<MetadataLinkItem>,
102
103    /// A list of resources contained inside an epub extracted from OPF
104    ///
105    /// All resources in the epub file are declared here, and undeclared resources
106    /// should not be stored in the epub file and cannot be obtained from it.
107    ///
108    /// ## Storage Implementation
109    ///
110    /// By default, this field uses [`IndexMap`] to preserve the original declaration
111    /// order from the OPF file, as recommended by the EPUB specification.
112    ///
113    /// To reduce dependencies, you can enable the `no-indexmap` feature to use
114    /// [`HashMap`] instead. Note that this will not preserve the manifest order.
115    ///
116    /// ## EPUB Specification
117    ///
118    /// Per the <https://www.w3.org/TR/epub-33/#sec-manifest>:
119    ///
120    /// > The order of `item` elements within the manifest is significant for
121    /// > fallback chain processing and should be preserved when processing
122    /// > the publication.
123    #[cfg(not(feature = "no-indexmap"))]
124    pub manifest: IndexMap<String, ManifestItem>,
125    #[cfg(feature = "no-indexmap")]
126    pub manifest: HashMap<String, ManifestItem>,
127
128    /// Physical reading order of publications extracted from OPF
129    ///
130    /// This attribute declares the order in which multiple files
131    /// containing published content should be displayed.
132    pub spine: Vec<SpineItem>,
133
134    /// The encryption.xml extracted from the META-INF directory
135    pub encryption: Option<Vec<EncryptionData>>,
136
137    /// The navigation data of the epub file
138    pub catalog: Vec<NavPoint>,
139
140    /// The title of the catalog
141    pub catalog_title: String,
142
143    /// The index of the current reading spine
144    current_spine_index: AtomicUsize,
145
146    /// Whether the epub file contains encryption information
147    has_encryption: bool,
148}
149
150impl<R: Read + Seek> EpubDoc<R> {
151    /// Creates a new EPUB document instance from a reader
152    ///
153    /// This function is responsible for the core logic of parsing EPUB files,
154    /// including verifying the file format, parsing container information,
155    /// loading the OPF package document, and extracting metadata, manifest,
156    /// reading order, and other core information.
157    ///
158    /// ## Parameters
159    /// - `reader`: The data source that implements the `Read` and `Seek` traits,
160    ///   usually a file or memory buffer
161    /// - `epub_path`: The path to the EPUB file, used for path resolution and validation
162    ///
163    /// ## Return
164    /// - `Ok(EpubDoc<R>)`: The successfully parsed EPUB document object
165    /// - `Err(EpubError)`: Errors encountered during parsing
166    ///
167    /// ## Notes
168    /// - This function assumes the EPUB file structure is valid
169    // TODO: 增加对必需的 metadata 的检查
170    pub fn from_reader(reader: R, epub_path: PathBuf) -> Result<Self, EpubError> {
171        // Parsing process
172        // 1. Verify that the ZIP compression method conforms to the EPUB specification
173        // 2. Parse `META-INF/container.xml` retrieves the location of the OPF file
174        // 3. Parses the OPF file to obtain package documentation information
175        // 4. Extracts version information
176        // 5. Parses metadata, manifest, and spine
177        // 6. Parses encrypted information and directory navigation
178        // 7. Verifies and extracts the unique identifier
179
180        let mut archive = ZipArchive::new(reader).map_err(EpubError::from)?;
181        let epub_path = fs::canonicalize(epub_path)?;
182
183        compression_method_check(&mut archive)?;
184
185        let container =
186            get_file_in_zip_archive(&mut archive, "META-INF/container.xml")?.decode()?;
187        let package_path = Self::parse_container(container)?;
188        let base_path = package_path
189            .parent()
190            .expect("the parent directory of the opf file must exist")
191            .to_path_buf();
192
193        let opf_file = get_file_in_zip_archive(
194            &mut archive,
195            package_path
196                .to_str()
197                .expect("package_path should be valid UTF-8"),
198        )?
199        .decode()?;
200        let package = XmlReader::parse(&opf_file)?;
201
202        let version = Self::determine_epub_version(&package)?;
203        let has_encryption = archive
204            .by_path(Path::new("META-INF/encryption.xml"))
205            .is_ok();
206
207        let mut doc = Self {
208            archive: Arc::new(Mutex::new(archive)),
209            epub_path,
210            package_path,
211            base_path,
212            version,
213            unique_identifier: String::new(),
214            metadata: vec![],
215            metadata_link: vec![],
216
217            #[cfg(feature = "no-indexmap")]
218            manifest: HashMap::new(),
219            #[cfg(not(feature = "no-indexmap"))]
220            manifest: IndexMap::new(),
221
222            spine: vec![],
223            encryption: None,
224            catalog: vec![],
225            catalog_title: String::new(),
226            current_spine_index: AtomicUsize::new(0),
227            has_encryption,
228        };
229
230        let metadata_element = package.find_elements_by_name("metadata").next().unwrap();
231        let manifest_element = package.find_elements_by_name("manifest").next().unwrap();
232        let spine_element = package.find_elements_by_name("spine").next().unwrap();
233
234        doc.parse_metadata(metadata_element)?;
235        doc.parse_manifest(manifest_element)?;
236        doc.parse_spine(spine_element)?;
237        doc.parse_encryption()?;
238        doc.parse_catalog()?;
239
240        // 断言必有唯一标识符
241        doc.unique_identifier = if let Some(uid) = package.get_attr("unique-identifier") {
242            doc.metadata.iter().find(|item| {
243                item.property == "identifier" && item.id.as_ref().is_some_and(|id| id == &uid)
244            })
245        } else {
246            doc.metadata
247                .iter()
248                .find(|item| item.property == "identifier")
249        }
250        .map(|item| item.value.clone())
251        .ok_or_else(|| EpubError::NonCanonicalFile { tag: "dc:identifier".to_string() })?;
252
253        Ok(doc)
254    }
255
256    /// Parse the EPUB container file (META-INF/container.xml)
257    ///
258    /// This function parses the container information in the EPUB file 、
259    /// to extract the path to the OPF package file. According to the EPUB
260    /// specification, the `container.xml` file must exist in the `META-INF`
261    /// directory and contain at least one `rootfile` element pointing to
262    /// the main OPF file. When multiple `rootfile` elements exist, the first
263    /// element pointing to the OPF file is used as the default.
264    ///
265    /// ## Parameters
266    /// - `data`: The content string of the container.xml
267    ///
268    /// ## Return
269    /// - `Ok(PathBuf)`: The path to the successfully parsed OPF file
270    /// - `Err(EpubError)`: Errors encountered during parsing
271    fn parse_container(data: String) -> Result<PathBuf, EpubError> {
272        let root = XmlReader::parse(&data)?;
273        let rootfile = root
274            .find_elements_by_name("rootfile")
275            .next()
276            .ok_or_else(|| EpubError::NonCanonicalFile { tag: "rootfile".to_string() })?;
277
278        let attr =
279            rootfile
280                .get_attr("full-path")
281                .ok_or_else(|| EpubError::MissingRequiredAttribute {
282                    tag: "rootfile".to_string(),
283                    attribute: "full-path".to_string(),
284                })?;
285
286        Ok(PathBuf::from(attr))
287    }
288
289    /// Parse the EPUB metadata section
290    ///
291    /// This function is responsible for parsing the `<metadata>` elements
292    /// in the OPF file to extract basic information about the publication.
293    /// It handles metadata elements from different namespaces:
294    /// - Elements in the Dublin Core namespace (`http://purl.org/dc/elements/1.1/`)
295    /// - Elements in the OPF namespace (`http://www.idpf.org/2007/opf`)
296    ///
297    /// ## Parameters
298    /// - `metadata_element`: A reference to the `<metadata>` element in the OPF file
299    fn parse_metadata(&mut self, metadata_element: &XmlElement) -> Result<(), EpubError> {
300        const DC_NAMESPACE: &str = "http://purl.org/dc/elements/1.1/";
301        const OPF_NAMESPACE: &str = "http://www.idpf.org/2007/opf";
302
303        let mut metadata = Vec::new();
304        let mut metadata_link = Vec::new();
305        let mut refinements = HashMap::<String, Vec<MetadataRefinement>>::new();
306
307        for element in metadata_element.children() {
308            match &element.namespace {
309                Some(namespace) if namespace == DC_NAMESPACE => {
310                    self.parse_dc_metadata(element, &mut metadata)?
311                }
312
313                Some(namespace) if namespace == OPF_NAMESPACE => self.parse_opf_metadata(
314                    element,
315                    &mut metadata,
316                    &mut metadata_link,
317                    &mut refinements,
318                )?,
319
320                _ => {}
321            };
322        }
323
324        for item in metadata.iter_mut() {
325            if let Some(id) = &item.id {
326                if let Some(refinements) = refinements.remove(id) {
327                    item.refined = refinements;
328                }
329            }
330        }
331
332        self.metadata = metadata;
333        self.metadata_link = metadata_link;
334        Ok(())
335    }
336
337    /// Parse the EPUB manifest section
338    ///
339    /// This function parses the `<manifest>` element in the OPF file, extracting
340    /// information about all resource files in the publication. Each resource contains
341    /// basic information such as id, file path, MIME type, as well as optional
342    /// attributes and fallback resource information.
343    ///
344    /// ## Parameters
345    /// - `manifest_element`: A reference to the `<manifest>` element in the OPF file
346    fn parse_manifest(&mut self, manifest_element: &XmlElement) -> Result<(), EpubError> {
347        let estimated_items = manifest_element.children().count();
348        #[cfg(feature = "no-indexmap")]
349        let mut resources = HashMap::with_capacity(estimated_items);
350        #[cfg(not(feature = "no-indexmap"))]
351        let mut resources = IndexMap::with_capacity(estimated_items);
352
353        for element in manifest_element.children() {
354            let id = element
355                .get_attr("id")
356                .ok_or_else(|| EpubError::MissingRequiredAttribute {
357                    tag: element.tag_name(),
358                    attribute: "id".to_string(),
359                })?
360                .to_string();
361            let path = element
362                .get_attr("href")
363                .ok_or_else(|| EpubError::MissingRequiredAttribute {
364                    tag: element.tag_name(),
365                    attribute: "href".to_string(),
366                })?
367                .to_string();
368            let mime = element
369                .get_attr("media-type")
370                .ok_or_else(|| EpubError::MissingRequiredAttribute {
371                    tag: element.tag_name(),
372                    attribute: "media-type".to_string(),
373                })?
374                .to_string();
375            let properties = element.get_attr("properties");
376            let fallback = element.get_attr("fallback");
377
378            resources.insert(
379                id.clone(),
380                ManifestItem {
381                    id,
382                    path: self.normalize_manifest_path(&path)?,
383                    mime,
384                    properties,
385                    fallback,
386                },
387            );
388        }
389
390        self.manifest = resources;
391        self.validate_fallback_chains();
392        Ok(())
393    }
394
395    /// Parse the EPUB spine section
396    ///
397    /// This function parses the `<spine>` elements in the OPF file to extract
398    /// the reading order information of the publication. The spine defines the
399    /// linear reading order of the publication's content documents, and each
400    /// spine item references resources in the manifest.
401    ///
402    /// ## Parameters
403    /// - `spine_element`: A reference to the `<spine>` element in the OPF file
404    fn parse_spine(&mut self, spine_element: &XmlElement) -> Result<(), EpubError> {
405        let mut spine = Vec::new();
406        for element in spine_element.children() {
407            let idref = element
408                .get_attr("idref")
409                .ok_or_else(|| EpubError::MissingRequiredAttribute {
410                    tag: element.tag_name(),
411                    attribute: "idref".to_string(),
412                })?
413                .to_string();
414            let id = element.get_attr("id");
415            let linear = element
416                .get_attr("linear")
417                .map(|linear| linear == "yes")
418                .unwrap_or(true);
419            let properties = element.get_attr("properties");
420
421            spine.push(SpineItem { idref, id, linear, properties });
422        }
423
424        self.spine = spine;
425        Ok(())
426    }
427
428    /// Parse the EPUB encryption file (META-INF/encryption.xml)
429    ///
430    /// This function is responsible for parsing the `encryption.xml` file
431    /// in the `META-INF` directory to extract information about encrypted
432    /// resources in the publication. According to the EPUB specification,
433    /// the encryption information describes which resources are encrypted
434    /// and the encryption methods used.
435    ///
436    /// TODO: 需要对使用非对称加密数据的加密项进行额外处理，以获取非对称加密密钥
437    fn parse_encryption(&mut self) -> Result<(), EpubError> {
438        if !self.has_encryption() {
439            return Ok(());
440        }
441
442        let mut archive = self.archive.lock()?;
443        let encryption_file =
444            get_file_in_zip_archive(&mut archive, "META-INF/encryption.xml")?.decode()?;
445
446        let root = XmlReader::parse(&encryption_file)?;
447
448        let mut encryption_data = Vec::new();
449        for data in root.children() {
450            if data.name != "EncryptedData" {
451                continue;
452            }
453
454            let method = data
455                .find_elements_by_name("EncryptionMethod")
456                .next()
457                .ok_or_else(|| EpubError::NonCanonicalFile {
458                    tag: "EncryptionMethod".to_string(),
459                })?;
460            let reference = data
461                .find_elements_by_name("CipherReference")
462                .next()
463                .ok_or_else(|| EpubError::NonCanonicalFile {
464                    tag: "CipherReference".to_string(),
465                })?;
466
467            encryption_data.push(EncryptionData {
468                method: method
469                    .get_attr("Algorithm")
470                    .ok_or_else(|| EpubError::MissingRequiredAttribute {
471                        tag: "EncryptionMethod".to_string(),
472                        attribute: "Algorithm".to_string(),
473                    })?
474                    .to_string(),
475                data: reference
476                    .get_attr("URI")
477                    .ok_or_else(|| EpubError::MissingRequiredAttribute {
478                        tag: "CipherReference".to_string(),
479                        attribute: "URI".to_string(),
480                    })?
481                    .to_string(),
482            });
483        }
484
485        if !encryption_data.is_empty() {
486            self.encryption = Some(encryption_data);
487        }
488
489        Ok(())
490    }
491
492    /// Parse the EPUB navigation information
493    ///
494    /// This function is responsible for parsing the navigation information of EPUB
495    /// publications. Different parsing strategies are used depending on the EPUB version:
496    /// - EPUB 2.0: Parses the NCX file to obtain directory information
497    /// - EPUB 3.0: Parses the Navigation Document (NAV) file to obtain directory information
498    fn parse_catalog(&mut self) -> Result<(), EpubError> {
499        const HEAD_TAGS: [&str; 6] = ["h1", "h2", "h3", "h4", "h5", "h6"];
500
501        let mut archive = self.archive.lock()?;
502        match self.version {
503            EpubVersion::Version2_0 => {
504                let opf_file =
505                    get_file_in_zip_archive(&mut archive, self.package_path.to_str().unwrap())?
506                        .decode()?;
507                let opf_element = XmlReader::parse(&opf_file)?;
508
509                let toc_id = opf_element
510                    .find_children_by_name("spine")
511                    .next()
512                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?
513                    .get_attr("toc")
514                    .ok_or_else(|| EpubError::MissingRequiredAttribute {
515                        tag: "spine".to_string(),
516                        attribute: "toc".to_string(),
517                    })?
518                    .to_owned();
519                let toc_path = self
520                    .manifest
521                    .get(&toc_id)
522                    .ok_or(EpubError::ResourceIdNotExist { id: toc_id })?
523                    .path
524                    .to_str()
525                    .unwrap();
526
527                let ncx_file = get_file_in_zip_archive(&mut archive, toc_path)?.decode()?;
528                let ncx = XmlReader::parse(&ncx_file)?;
529
530                match ncx.find_elements_by_name("docTitle").next() {
531                    Some(element) => self.catalog_title = element.text(),
532                    None => log::warn!(
533                        "Expecting to get docTitle information from the ncx file, but it's missing."
534                    ),
535                };
536
537                let nav_map = ncx
538                    .find_elements_by_name("navMap")
539                    .next()
540                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: "navMap".to_string() })?;
541
542                self.catalog = self.parse_nav_points(nav_map)?;
543
544                Ok(())
545            }
546
547            EpubVersion::Version3_0 => {
548                let nav_path = self
549                    .manifest
550                    .values()
551                    .find(|item| {
552                        if let Some(property) = &item.properties {
553                            return property.contains("nav");
554                        }
555                        false
556                    })
557                    .map(|item| item.path.clone())
558                    .ok_or_else(|| EpubError::NonCanonicalEpub {
559                        expected_file: "Navigation Document".to_string(),
560                    })?;
561
562                let nav_file =
563                    get_file_in_zip_archive(&mut archive, nav_path.to_str().unwrap())?.decode()?;
564
565                let nav_element = XmlReader::parse(&nav_file)?;
566                let nav = nav_element
567                    .find_elements_by_name("nav")
568                    .find(|&element| element.get_attr("epub:type") == Some(String::from("toc")))
569                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: "nav".to_string() })?;
570                let nav_title = nav.find_children_by_names(&HEAD_TAGS).next();
571                let nav_list = nav
572                    .find_children_by_name("ol")
573                    .next()
574                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: "ol".to_string() })?;
575
576                self.catalog = self.parse_catalog_list(nav_list)?;
577                if let Some(nav_title) = nav_title {
578                    self.catalog_title = nav_title.text();
579                };
580                Ok(())
581            }
582        }
583    }
584
585    /// Check if the EPUB file contains `encryption.xml`
586    ///
587    /// This function determines whether a publication contains encrypted resources
588    /// by checking if a `META-INF/encryption.xml` file exists in the EPUB package.
589    /// According to the EPUB specification, when resources in a publication are
590    /// encrypted, the corresponding encryption information must be declared in
591    /// the `META-INF/encryption.xml` file.
592    ///
593    /// ## Return
594    /// - `true` if the publication contains encrypted resources
595    /// - `false` if the publication does not contain encrypted resources
596    ///
597    /// ## Notes
598    /// - This function only checks the existence of the encrypted file;
599    ///   it does not verify the validity of the encrypted information.
600    #[inline]
601    pub fn has_encryption(&self) -> bool {
602        self.has_encryption
603    }
604
605    /// Retrieves a list of metadata items
606    ///
607    /// This function retrieves all matching metadata items from the EPUB metadata
608    /// based on the specified attribute name (key). Metadata items may come from
609    /// the DC (Dublin Core) namespace or the OPF namespace and contain basic
610    /// information about the publication, such as title, author, identifier, etc.
611    ///
612    /// ## Parameters
613    /// - `key`: The name of the metadata attribute to retrieve
614    ///
615    /// ## Return
616    /// - `Some(Vec<MetadataItem>)`: A vector containing all matching metadata items
617    /// - `None`: If no matching metadata items are found
618    pub fn get_metadata(&self, key: &str) -> Option<Vec<MetadataItem>> {
619        let metadatas = self
620            .metadata
621            .iter()
622            .filter(|item| item.property == key)
623            .cloned()
624            .collect::<Vec<MetadataItem>>();
625
626        (!metadatas.is_empty()).then_some(metadatas)
627    }
628
629    /// Retrieves a list of values for specific metadata items
630    ///
631    /// This function retrieves the values of all matching metadata items from
632    /// the EPUB metadata based on the given property name (key).
633    ///
634    /// ## Parameters
635    /// - `key`: The name of the metadata attribute to retrieve
636    ///
637    /// ## Return
638    /// - `Some(Vec<String>)`: A vector containing all matching metadata item values
639    /// - `None`: If no matching metadata items are found
640    pub fn get_metadata_value(&self, key: &str) -> Option<Vec<String>> {
641        let values = self
642            .metadata
643            .iter()
644            .filter(|item| item.property == key)
645            .map(|item| item.value.clone())
646            .collect::<Vec<String>>();
647
648        (!values.is_empty()).then_some(values)
649    }
650
651    /// Retrieves the title of the publication
652    ///
653    /// This function retrieves all title information from the EPUB metadata.
654    /// According to the EPUB specification, a publication can have multiple titles,
655    /// which are returned in the order they appear in the metadata.
656    ///
657    /// ## Return
658    /// - `Result<Vec<String>, EpubError>`: A vector containing all title information
659    /// - `EpubError`: If and only if the OPF file does not contain `<dc:title>`
660    ///
661    /// ## Notes
662    /// - The EPUB specification requires each publication to have at least one title.
663    #[inline]
664    pub fn get_title(&self) -> Vec<String> {
665        self.get_metadata_value("title")
666            .expect("missing required 'title' metadata which is required by the EPUB specification")
667    }
668
669    /// Retrieves the language used in the publication
670    ///
671    /// This function retrieves the language information of a publication from the EPUB
672    /// metadata. According to the EPUB specification, language information identifies
673    /// the primary language of the publication and can have multiple language identifiers.
674    ///
675    /// ## Return
676    /// - `Ok(Vec<String>)`: A vector containing all language identifiers
677    /// - `Err(EpubError)`: If and only if the OPF file does not contain `<dc:language>`
678    ///
679    /// ## Notes
680    /// - The EPUB specification requires that each publication specify at least one primary language.
681    /// - Language identifiers should conform to RFC 3066 or later standards.
682    #[inline]
683    pub fn get_language(&self) -> Vec<String> {
684        self.get_metadata_value("language").expect(
685            "missing required 'language' metadata which is required by the EPUB specification",
686        )
687    }
688
689    /// Retrieves the identifier of a publication
690    ///
691    /// This function retrieves the identifier information of a publication from
692    /// the EPUB metadata. According to the EPUB specification, each publication
693    /// must have a identifier, typically an ISBN, UUID, or other unique identifier.
694    ///
695    /// ## Return
696    /// - `Ok(Vec<String>)`: A vector containing all identifier information
697    /// - `Err(EpubError)`: If and only if the OPF file does not contain `<dc:identifier>`
698    ///
699    /// ## Notes
700    /// - The EPUB specification requires each publication to have at least one identifier.
701    /// - In the OPF file, the `unique-identifier` attribute of the `<package>` element
702    ///   should point to a `<dc:identifier>` element used to uniquely identify the publication.
703    ///   This means that `unique-identifier` is not exactly equal to `<dc:identifier>`.
704    #[inline]
705    pub fn get_identifier(&self) -> Vec<String> {
706        self.get_metadata_value("identifier").expect(
707            "missing required 'identifier' metadata which is required by the EPUB specification",
708        )
709    }
710
711    /// Retrieves a unified metadata sheet from the EPUB publication
712    ///
713    /// This function consolidates all metadata from the EPUB into a single `MetadataSheet`
714    /// structure, providing a simplified interface for metadata access. It handles both
715    /// EPUB 2 and EPUB 3 metadata formats, including refinements from EPUB 3.
716    ///
717    /// ## Return
718    /// - `MetadataSheet`: A populated metadata sheet containing all publication metadata
719    ///
720    /// ## Notes
721    /// - Multi-value metadata (title, creator, etc.) are stored in Vec fields in order
722    /// - Date metadata extracts event type from refinements (e.g., "publication", "modification")
723    /// - Identifier metadata uses item IDs as keys in the HashMap
724    pub fn get_metadata_sheet(&self) -> MetadataSheet {
725        let mut sheet = MetadataSheet::new();
726        for item in &self.metadata {
727            let value = item.value.clone();
728
729            match item.property.as_str() {
730                "title" => {
731                    sheet.title.push(value);
732                }
733                "creator" => {
734                    sheet.creator.push(value);
735                }
736                "contributor" => {
737                    sheet.contributor.push(value);
738                }
739                "subject" => {
740                    sheet.subject.push(value);
741                }
742                "language" => {
743                    sheet.language.push(value);
744                }
745                "relation" => {
746                    sheet.relation.push(value);
747                }
748                "date" => {
749                    let event = item
750                        .refined
751                        .iter()
752                        .filter_map(|refine| {
753                            if refine.property.eq("event") {
754                                Some(refine.value.clone())
755                            } else {
756                                None
757                            }
758                        })
759                        .next()
760                        .unwrap_or_default();
761                    sheet.date.insert(value, event);
762                }
763                "identifier" => {
764                    let id = item.id.clone().unwrap_or_default();
765                    sheet.identifier.insert(id, value);
766                }
767                "description" => {
768                    sheet.description = value;
769                }
770                "format" => {
771                    sheet.format = value;
772                }
773                "publisher" => {
774                    sheet.publisher = value;
775                }
776                "rights" => {
777                    sheet.rights = value;
778                }
779                "source" => {
780                    sheet.source = value;
781                }
782                "ccoverage" => {
783                    sheet.coverage = value;
784                }
785                "type" => {
786                    sheet.epub_type = value;
787                }
788                _ => {}
789            };
790        }
791
792        sheet
793    }
794
795    /// Retrieve resource data by resource ID
796    ///
797    /// This function will find the resource with the specified ID in the manifest.
798    /// If the resource is encrypted, it will be automatically decrypted.
799    ///
800    /// ## Parameters
801    /// - `id`: The ID of the resource to retrieve
802    ///
803    /// ## Return
804    /// - `Ok((Vec<u8>, String))`: Successfully retrieved and decrypted resource data and
805    ///   the MIME type
806    /// - `Err(EpubError)`: Errors that occurred during the retrieval process
807    ///
808    /// ## Notes
809    /// - This function will automatically decrypt the resource if it is encrypted.
810    /// - For unsupported encryption methods, the corresponding error will be returned.
811    pub fn get_manifest_item(&self, id: &str) -> Result<(Vec<u8>, String), EpubError> {
812        let resource_item = self
813            .manifest
814            .get(id)
815            .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
816
817        self.get_resource(resource_item)
818    }
819
820    /// Retrieves resource item data by resource path
821    ///
822    /// This function retrieves resources from the manifest based on the input path.
823    /// The input path must be a relative path to the root directory of the EPUB container;
824    /// using an absolute path or a relative path to another location will result in an error.
825    ///
826    /// ## Parameters
827    /// - `path`: The path of the resource to retrieve
828    ///
829    /// ## Return
830    /// - `Ok((Vec<u8>, String))`: Successfully retrieved and decrypted resource data and
831    ///   the MIME type
832    /// - `Err(EpubError)`: Errors that occurred during the retrieval process
833    ///
834    /// ## Notes
835    /// - This function will automatically decrypt the resource if it is encrypted.
836    /// - For unsupported encryption methods, the corresponding error will be returned.
837    /// - Relative paths other than the root directory of the Epub container are not supported.
838    pub fn get_manifest_item_by_path(&self, path: &str) -> Result<(Vec<u8>, String), EpubError> {
839        let manifest = self
840            .manifest
841            .iter()
842            .find(|(_, item)| item.path.to_str().unwrap() == path)
843            .map(|(_, manifest)| manifest)
844            .ok_or_else(|| EpubError::ResourceNotFound { resource: path.to_string() })?;
845
846        self.get_resource(manifest)
847    }
848
849    /// Retrieves supported resource items by resource ID, with fallback mechanism supported
850    ///
851    /// This function attempts to retrieve the resource item with the specified ID and
852    /// checks if its MIME type is in the list of supported formats. If the current resource
853    /// format is not supported, it searches for a supported resource format along the
854    /// fallback chain according to the fallback mechanism defined in the EPUB specification.
855    ///
856    /// ## Parameters
857    /// - `id`: The ID of the resource to retrieve
858    /// - `supported_format`: A vector of supported MIME types
859    ///
860    /// ## Return
861    /// - `Ok((Vec<u8>, String))`: Successfully retrieved and decrypted resource data and
862    ///   the MIME type
863    /// - `Err(EpubError)`: Errors that occurred during the retrieval process
864    pub fn get_manifest_item_with_fallback(
865        &self,
866        id: &str,
867        supported_format: &[&str],
868    ) -> Result<(Vec<u8>, String), EpubError> {
869        let mut current_id = id;
870        let mut fallback_chain = Vec::<&str>::new();
871        'fallback: loop {
872            let manifest_item = self
873                .manifest
874                .get(current_id)
875                .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
876
877            if supported_format.contains(&manifest_item.mime.as_str()) {
878                return self.get_resource(manifest_item);
879            }
880
881            let fallback_id = match &manifest_item.fallback {
882                // The loop ends when no fallback resource exists
883                None => break 'fallback,
884
885                // End the loop when the loop continues to fallback if a fallback resource exists
886                Some(id) if fallback_chain.contains(&id.as_str()) => break 'fallback,
887
888                Some(id) => {
889                    fallback_chain.push(id.as_str());
890
891                    // Since only warnings are issued for fallback resource checks
892                    // during initialization, the issue of fallback resources possibly
893                    // not existing needs to be handled here.
894                    id.as_str()
895                }
896            };
897
898            current_id = fallback_id;
899        }
900
901        Err(EpubError::NoSupportedFileFormat)
902    }
903
904    /// Retrieves the cover of the EPUB document
905    ///
906    /// This function searches for the cover of the EPUB document by examining manifest
907    /// items in the manifest. It looks for manifest items whose ID or attribute contains
908    /// "cover" (case-insensitive) and attempts to retrieve the content of the first match.
909    ///
910    /// ## Return
911    /// - `Some((Vec<u8>, String))`: Successfully retrieved and decrypted cover data and
912    ///   the MIME type
913    /// - `None`: No cover resource was found
914    ///
915    /// ## Notes
916    /// - This function only returns the first successfully retrieved cover resource,
917    ///   even if multiple matches exist
918    /// - The retrieved cover may not be an image resource; users need to pay attention
919    ///   to the resource's MIME type.
920    pub fn get_cover(&self) -> Option<(Vec<u8>, String)> {
921        self.manifest
922            .values()
923            .filter(|manifest| {
924                manifest.id.to_ascii_lowercase().contains("cover")
925                    || manifest
926                        .properties
927                        .as_ref()
928                        .map(|properties| properties.to_ascii_lowercase().contains("cover"))
929                        .unwrap_or(false)
930            })
931            .find_map(|manifest| {
932                self.get_resource(manifest)
933                    .map_err(|err| log::warn!("{err}"))
934                    .ok()
935            })
936    }
937
938    /// Retrieves resource data by manifest item
939    fn get_resource(&self, resource_item: &ManifestItem) -> Result<(Vec<u8>, String), EpubError> {
940        let path = resource_item
941            .path
942            .to_str()
943            .expect("manifest item path should be valid UTF-8");
944
945        let mut archive = self.archive.lock()?;
946        let mut data = match archive.by_name(path) {
947            Ok(mut file) => {
948                let mut entry = Vec::<u8>::new();
949                file.read_to_end(&mut entry)?;
950                Ok(entry)
951            }
952            Err(ZipError::FileNotFound) => {
953                Err(EpubError::ResourceNotFound { resource: path.to_string() })
954            }
955            Err(err) => Err(EpubError::from(err)),
956        }?;
957
958        if let Some(method) = self.is_encryption_file(path) {
959            data = self.auto_dencrypt(&method, &mut data)?;
960        }
961
962        Ok((data, resource_item.mime.clone()))
963    }
964
965    /// Navigate to a specified chapter using the spine index
966    ///
967    /// This function retrieves the content data of the corresponding chapter based
968    /// on the index position in the EPUB spine. The spine defines the linear reading
969    /// order of the publication's content documents, and each spine item references
970    /// resources in the manifest.
971    ///
972    /// ## Parameters
973    /// - `index`: The index position in the spine, starting from 0
974    ///
975    /// ## Return
976    /// - `Some((Vec<u8>, String))`: Successfully retrieved chapter content data and the MIME type
977    /// - `None`: Index out of range or data retrieval error
978    ///
979    /// ## Notes
980    /// - The index must be less than the total number of spine projects.
981    /// - If the resource is encrypted, it will be automatically decrypted before returning.
982    /// - It does not check whether the Spine project follows a linear reading order.
983    pub fn navigate_by_spine_index(&mut self, index: usize) -> Option<(Vec<u8>, String)> {
984        if index >= self.spine.len() {
985            return None;
986        }
987
988        let manifest_id = self.spine[index].idref.as_ref();
989        self.current_spine_index.store(index, Ordering::SeqCst);
990        self.get_manifest_item(manifest_id)
991            .map_err(|err| log::warn!("{err}"))
992            .ok()
993    }
994
995    /// Navigate to the previous linear reading chapter
996    ///
997    /// This function searches backwards in the EPUB spine for the previous linear
998    /// reading chapter and returns the content data of that chapter. It only navigates
999    /// to chapters marked as linear reading.
1000    ///
1001    /// ## Return
1002    /// - `Some((Vec<u8>, String))`: Successfully retrieved previous chapter content data and
1003    ///   the MIME type
1004    /// - `None`: Already in the first chapter, the current chapter is not linear,
1005    ///   or data retrieval failed
1006    pub fn spine_prev(&self) -> Option<(Vec<u8>, String)> {
1007        let current_index = self.current_spine_index.load(Ordering::SeqCst);
1008        if current_index == 0 || !self.spine[current_index].linear {
1009            return None;
1010        }
1011
1012        let prev_index = (0..current_index)
1013            .rev()
1014            .find(|&index| self.spine[index].linear)?;
1015
1016        self.current_spine_index.store(prev_index, Ordering::SeqCst);
1017        let manifest_id = self.spine[prev_index].idref.as_ref();
1018        self.get_manifest_item(manifest_id)
1019            .map_err(|err| log::warn!("{err}"))
1020            .ok()
1021    }
1022
1023    /// Navigate to the next linear reading chapter
1024    ///
1025    /// This function searches forwards in the EPUB spine for the next linear reading
1026    /// chapter and returns the content data of that chapter. It only navigates to
1027    /// chapters marked as linear reading.
1028    ///
1029    /// ## Return
1030    /// - `Some((Vec<u8>, String))`: Successfully retrieved next chapter content data and
1031    ///   the MIME type
1032    /// - `None`: Already in the last chapter, the current chapter is not linear,
1033    ///   or data retrieval failed
1034    pub fn spine_next(&mut self) -> Option<(Vec<u8>, String)> {
1035        let current_index = self.current_spine_index.load(Ordering::SeqCst);
1036        if current_index >= self.spine.len() - 1 || !self.spine[current_index].linear {
1037            return None;
1038        }
1039
1040        let next_index =
1041            (current_index + 1..self.spine.len()).find(|&index| self.spine[index].linear)?;
1042
1043        self.current_spine_index.store(next_index, Ordering::SeqCst);
1044        let manifest_id = self.spine[next_index].idref.as_ref();
1045        self.get_manifest_item(manifest_id)
1046            .map_err(|err| log::warn!("{err}"))
1047            .ok()
1048    }
1049
1050    /// Retrieves the content data of the current chapter
1051    ///
1052    /// This function returns the content data of the chapter at the current
1053    /// index position in the EPUB spine.
1054    ///
1055    /// ## Return
1056    /// - `Some((Vec<u8>, String))`: Successfully retrieved current chapter content data and
1057    ///   the MIME type
1058    /// - `None`: Data retrieval failed
1059    pub fn spine_current(&self) -> Option<(Vec<u8>, String)> {
1060        let manifest_id = self.spine[self.current_spine_index.load(Ordering::SeqCst)]
1061            .idref
1062            .as_ref();
1063        self.get_manifest_item(manifest_id)
1064            .map_err(|err| log::warn!("{err}"))
1065            .ok()
1066    }
1067
1068    /// Determine the EPUB version from the OPF file
1069    ///
1070    /// This function is used to detect the version of an epub file from an OPF file.
1071    /// When the version attribute in the package is abnormal, version information will
1072    /// be identified through some version characteristics of the epub file. An error is
1073    /// returned when neither direct nor indirect methods can identify the version.
1074    ///
1075    /// ## Parameters
1076    /// - `opf_element`: A reference to the OPF file element
1077    fn determine_epub_version(opf_element: &XmlElement) -> Result<EpubVersion, EpubError> {
1078        // Check the explicit version attribute
1079        if let Some(version) = opf_element.get_attr("version") {
1080            match version.as_str() {
1081                "2.0" => return Ok(EpubVersion::Version2_0),
1082                "3.0" => return Ok(EpubVersion::Version3_0),
1083                _ => {}
1084            }
1085        }
1086
1087        let spine_element = opf_element
1088            .find_elements_by_name("spine")
1089            .next()
1090            .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?;
1091
1092        // Look for EPUB 2.x specific features
1093        if spine_element.get_attr("toc").is_some() {
1094            return Ok(EpubVersion::Version2_0);
1095        }
1096
1097        let manifest_element = opf_element
1098            .find_elements_by_name("manifest")
1099            .next()
1100            .ok_or_else(|| EpubError::NonCanonicalFile { tag: "manifest".to_string() })?;
1101
1102        // Look for EPUB 3.x specific features
1103        manifest_element
1104            .children()
1105            .find_map(|element| {
1106                if let Some(id) = element.get_attr("id") {
1107                    if id.eq("nav") {
1108                        return Some(EpubVersion::Version3_0);
1109                    }
1110                }
1111
1112                None
1113            })
1114            .ok_or(EpubError::UnrecognizedEpubVersion)
1115    }
1116
1117    /// Parse metadata elements under the Dublin Core namespace
1118    ///
1119    /// This function handles the `<metadata>` Dublin Core element in the OPF file (namespace
1120    /// is "http://purl.org/dc/elements/1.1/"). These elements usually contain the basic
1121    /// information of the publication, such as title, author, publication date, etc.
1122    ///
1123    /// ## Notes
1124    /// - In EPUB 3.0, granular information is handled by separate '<meta>' elements and 'refines' attributes
1125    /// - All text content is normalized by whitespace
1126    #[inline]
1127    fn parse_dc_metadata(
1128        &self,
1129        element: &XmlElement,
1130        metadata: &mut Vec<MetadataItem>,
1131        // refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1132    ) -> Result<(), EpubError> {
1133        let id = element.get_attr("id");
1134        let lang = element.get_attr("lang");
1135        let property = element.name.clone();
1136        let value = element.text().normalize_whitespace();
1137
1138        let refined = match self.version {
1139            // In EPUB 2.0, supplementary metadata (refinements) are represented
1140            // through other attribute data pairs of the tag.
1141            EpubVersion::Version2_0 => element
1142                .attributes
1143                .iter()
1144                .map(|(name, value)| {
1145                    let property = name.to_string();
1146                    let value = value.to_string().normalize_whitespace();
1147
1148                    MetadataRefinement {
1149                        refines: id.clone().unwrap(),
1150                        property,
1151                        value,
1152                        lang: None,
1153                        scheme: None,
1154                    }
1155                })
1156                .collect(),
1157            EpubVersion::Version3_0 => vec![],
1158        };
1159
1160        metadata.push(MetadataItem { id, property, value, lang, refined });
1161
1162        Ok(())
1163    }
1164
1165    /// Parse metadata elements under the OPF namespace
1166    ///
1167    /// This function handles the `<metadata>` OPF element in the OPF file (namespace
1168    /// is "http://www.idpf.org/2007/opf"). These elements include '<meta>' and '<link>',
1169    /// which are used to provide extended metadata and links to external resources for EPUB publications.
1170    ///
1171    /// ## Notes
1172    /// - The function is only responsible for distribution processing, and the
1173    ///   specific parsing logic is implemented in the dedicated function
1174    /// - All parsing results are added directly to the incoming collection and no new collection is returned
1175    #[inline]
1176    fn parse_opf_metadata(
1177        &self,
1178        element: &XmlElement,
1179        metadata: &mut Vec<MetadataItem>,
1180        metadata_link: &mut Vec<MetadataLinkItem>,
1181        refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1182    ) -> Result<(), EpubError> {
1183        match element.name.as_str() {
1184            "meta" => self.parse_meta_element(element, metadata, refinements),
1185            "link" => self.parse_link_element(element, metadata_link),
1186            _ => Ok(()),
1187        }
1188    }
1189
1190    #[inline]
1191    fn parse_meta_element(
1192        &self,
1193        element: &XmlElement,
1194        metadata: &mut Vec<MetadataItem>,
1195        refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1196    ) -> Result<(), EpubError> {
1197        match self.version {
1198            EpubVersion::Version2_0 => {
1199                let property = element
1200                    .get_attr("name")
1201                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: element.tag_name() })?;
1202                let value = element
1203                    .get_attr("content")
1204                    .ok_or_else(|| EpubError::MissingRequiredAttribute {
1205                        tag: element.tag_name(),
1206                        attribute: "content".to_string(),
1207                    })?
1208                    .normalize_whitespace();
1209
1210                metadata.push(MetadataItem {
1211                    id: None,
1212                    property,
1213                    value,
1214                    lang: None,
1215                    refined: vec![],
1216                });
1217            }
1218
1219            EpubVersion::Version3_0 => {
1220                let property = element.get_attr("property").ok_or_else(|| {
1221                    EpubError::MissingRequiredAttribute {
1222                        tag: element.tag_name(),
1223                        attribute: "property".to_string(),
1224                    }
1225                })?;
1226                let value = element.text().normalize_whitespace();
1227                let lang = element.get_attr("lang");
1228
1229                if let Some(refines) = element.get_attr("refines") {
1230                    let id = refines.strip_prefix("#").unwrap_or(&refines).to_string();
1231                    let scheme = element.get_attr("scheme");
1232                    let refinement = MetadataRefinement {
1233                        refines: id.clone(),
1234                        property,
1235                        value,
1236                        lang,
1237                        scheme,
1238                    };
1239
1240                    if let Some(refinements) = refinements.get_mut(&id) {
1241                        refinements.push(refinement);
1242                    } else {
1243                        refinements.insert(id, vec![refinement]);
1244                    }
1245                } else {
1246                    let id = element.get_attr("id");
1247                    let item = MetadataItem {
1248                        id,
1249                        property,
1250                        value,
1251                        lang,
1252                        refined: vec![],
1253                    };
1254
1255                    metadata.push(item);
1256                };
1257            }
1258        }
1259        Ok(())
1260    }
1261
1262    #[inline]
1263    fn parse_link_element(
1264        &self,
1265        element: &XmlElement,
1266        metadata_link: &mut Vec<MetadataLinkItem>,
1267    ) -> Result<(), EpubError> {
1268        let href = element
1269            .get_attr("href")
1270            .ok_or_else(|| EpubError::MissingRequiredAttribute {
1271                tag: element.tag_name(),
1272                attribute: "href".to_string(),
1273            })?;
1274        let rel = element
1275            .get_attr("rel")
1276            .ok_or_else(|| EpubError::MissingRequiredAttribute {
1277                tag: element.tag_name(),
1278                attribute: "rel".to_string(),
1279            })?;
1280        let hreflang = element.get_attr("hreflang");
1281        let id = element.get_attr("id");
1282        let mime = element.get_attr("media-type");
1283        let properties = element.get_attr("properties");
1284
1285        metadata_link.push(MetadataLinkItem {
1286            href,
1287            rel,
1288            hreflang,
1289            id,
1290            mime,
1291            properties,
1292            refines: None,
1293        });
1294        Ok(())
1295    }
1296
1297    /// Recursively parse NCX navigation points from navMap or nested navPoint elements
1298    ///
1299    /// This function parses the hierarchical navigation structure defined in NCX files
1300    /// for EPUB 2.x documents. It handles nested navPoint elements to build a complete
1301    /// tree representation of the publication's table of contents.
1302    fn parse_nav_points(&self, parent_element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1303        let mut nav_points = Vec::new();
1304        for nav_point in parent_element.find_children_by_name("navPoint") {
1305            let label = match nav_point.find_children_by_name("navLabel").next() {
1306                Some(element) => element.text(),
1307                None => String::new(),
1308            };
1309
1310            let content = nav_point
1311                .find_children_by_name("content")
1312                .next()
1313                .map(|element| PathBuf::from(element.text()));
1314
1315            let play_order = nav_point
1316                .get_attr("playOrder")
1317                .and_then(|order| order.parse::<usize>().ok());
1318
1319            let children = self.parse_nav_points(nav_point)?;
1320
1321            nav_points.push(NavPoint { label, content, play_order, children });
1322        }
1323
1324        nav_points.sort();
1325        Ok(nav_points)
1326    }
1327
1328    /// Recursively parses directory list structures
1329    ///
1330    /// This function recursively parses HTML navigation list structures,
1331    /// converting `<ol>` and `<li>` elements into NavPoint structures.
1332    /// Multi-level nested directory structures are supported.
1333    fn parse_catalog_list(&self, element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1334        let mut catalog = Vec::new();
1335        for item in element.children() {
1336            if item.tag_name() != "li" {
1337                return Err(EpubError::NonCanonicalFile { tag: "li".to_string() });
1338            }
1339
1340            let title_element = item
1341                .find_children_by_names(&["span", "a"])
1342                .next()
1343                .ok_or_else(|| EpubError::NonCanonicalFile { tag: "span/a".to_string() })?;
1344            let content_href = title_element.get_attr("href").map(PathBuf::from);
1345            let sub_list = if let Some(list) = item.find_children_by_name("ol").next() {
1346                self.parse_catalog_list(list)?
1347            } else {
1348                vec![]
1349            };
1350
1351            catalog.push(NavPoint {
1352                label: title_element.text(),
1353                content: content_href,
1354                children: sub_list,
1355                play_order: None,
1356            });
1357        }
1358
1359        Ok(catalog)
1360    }
1361
1362    /// Converts relative paths in the manifest to normalized paths
1363    /// relative to the EPUB root directory
1364    ///
1365    /// This function processes the href attribute of resources in the EPUB
1366    /// manifest and converts it to a normalized path representation.
1367    /// It handles three types of paths:
1368    /// - Relative paths starting with `../` (checks if they exceed the EPUB package scope)
1369    /// - Absolute paths starting with `/` (relative to the EPUB root directory)
1370    /// - Other relative paths (relative to the directory containing the OPF file)
1371    ///
1372    /// ## Parameters
1373    /// - `path`: The href attribute value of the resource in the manifest
1374    ///
1375    /// ## Return
1376    /// - `Ok(PathBuf)`: The parsed normalized path
1377    /// - `Err(EpubError)`: Relative link leakage
1378    #[inline]
1379    fn normalize_manifest_path(&self, path: &str) -> Result<PathBuf, EpubError> {
1380        let mut path = if path.starts_with("../") {
1381            let mut current_dir = self.epub_path.join(&self.package_path);
1382            current_dir.pop();
1383
1384            check_realtive_link_leakage(self.epub_path.clone(), current_dir, path)
1385                .map(PathBuf::from)
1386                .ok_or_else(|| EpubError::RelativeLinkLeakage { path: path.to_string() })?
1387        } else if let Some(path) = path.strip_prefix("/") {
1388            PathBuf::from(path.to_string())
1389        } else {
1390            self.base_path.join(path)
1391        };
1392
1393        #[cfg(windows)]
1394        {
1395            path = PathBuf::from(path.to_string_lossy().replace('\\', "/"));
1396        }
1397
1398        Ok(path)
1399    }
1400
1401    /// Verify the fallback chain of all manifest items
1402    ///
1403    /// This function iterates through all manifest items with the fallback
1404    /// attribute and verifies the validity of their fallback chains, including checking:
1405    /// - Whether circular references exist
1406    /// - Whether the fallback resource exists in the manifest
1407    ///
1408    /// ## Notes
1409    /// If an invalid fallback chain is found, a warning log will be logged
1410    /// but the processing flow will not be interrupted.
1411    // TODO: consider using BFS to validate fallback chains, to provide efficient
1412    fn validate_fallback_chains(&self) {
1413        for (id, item) in &self.manifest {
1414            if item.fallback.is_none() {
1415                continue;
1416            }
1417
1418            let mut fallback_chain = Vec::new();
1419            if let Err(msg) = self.validate_fallback_chain(id, &mut fallback_chain) {
1420                log::warn!("Invalid fallback chain for item {}: {}", id, msg);
1421            }
1422        }
1423    }
1424
1425    /// Recursively verify the validity of a single fallback chain
1426    ///
1427    /// This function recursively traces the fallback chain to check for the following issues:
1428    /// - Circular reference
1429    /// - The referenced fallback resource does not exist
1430    ///
1431    /// ## Parameters
1432    /// - `manifest_id`: The id of the manifest item currently being verified
1433    /// - `fallback_chain`: The visited fallback chain paths used to detect circular references
1434    ///
1435    /// ## Return
1436    /// - `Ok(())`: The fallback chain is valid
1437    /// - `Err(String)`: A string containing error information
1438    fn validate_fallback_chain(
1439        &self,
1440        manifest_id: &str,
1441        fallback_chain: &mut Vec<String>,
1442    ) -> Result<(), String> {
1443        if fallback_chain.contains(&manifest_id.to_string()) {
1444            fallback_chain.push(manifest_id.to_string());
1445
1446            return Err(format!(
1447                "Circular reference detected in fallback chain for {}",
1448                fallback_chain.join("->")
1449            ));
1450        }
1451
1452        // Get the current item; its existence can be ensured based on the calling context.
1453        let item = self.manifest.get(manifest_id).unwrap();
1454
1455        if let Some(fallback_id) = &item.fallback {
1456            if !self.manifest.contains_key(fallback_id) {
1457                return Err(format!(
1458                    "Fallback resource {} does not exist in manifest",
1459                    fallback_id
1460                ));
1461            }
1462
1463            fallback_chain.push(manifest_id.to_string());
1464            self.validate_fallback_chain(fallback_id, fallback_chain)
1465        } else {
1466            // The end of the fallback chain
1467            Ok(())
1468        }
1469    }
1470
1471    /// Checks if a resource at the specified path is an encrypted file
1472    ///
1473    /// This function queries whether a specific resource path is marked as an encrypted
1474    /// file in the EPUB encryption information. It checks the encrypted data stored in
1475    /// `self.encryption`, looking for an entry that matches the given path.
1476    ///
1477    /// ## Parameters
1478    /// - `path`: The path of the resource to check
1479    ///
1480    /// ## Return
1481    /// - `Some(String)`: The encryption method used for the resource
1482    /// - `None`: The resource is not encrypted
1483    fn is_encryption_file(&self, path: &str) -> Option<String> {
1484        self.encryption.as_ref().and_then(|encryptions| {
1485            encryptions
1486                .iter()
1487                .find(|encryption| encryption.data == path)
1488                .map(|encryption| encryption.method.clone())
1489        })
1490    }
1491
1492    /// Automatically decrypts encrypted resource data
1493    ///
1494    /// Automatically decrypts data based on the provided encryption method.
1495    /// This function supports various encryption methods defined by the EPUB
1496    /// specification, including font obfuscation and the XML encryption standard.
1497    ///
1498    /// ## Parameters
1499    /// - `method`: The encryption method used for the resource
1500    /// - `data`: The encrypted resource data
1501    ///
1502    /// ## Return
1503    /// - `Ok(Vec<u8>)`: The decrypted resource data
1504    /// - `Err(EpubError)`: Unsupported encryption method
1505    ///
1506    /// ## Supported Encryption Methods
1507    /// - IDPF font obfuscation: `http://www.idpf.org/2008/embedding`
1508    /// - Adobe font obfuscation: `http://ns.adobe.com/pdf/enc#RC`
1509    #[inline]
1510    fn auto_dencrypt(&self, method: &str, data: &mut [u8]) -> Result<Vec<u8>, EpubError> {
1511        match method {
1512            "http://www.idpf.org/2008/embedding" => {
1513                Ok(idpf_font_dencryption(data, &self.unique_identifier))
1514            }
1515            "http://ns.adobe.com/pdf/enc#RC" => {
1516                Ok(adobe_font_dencryption(data, &self.unique_identifier))
1517            }
1518            _ => Err(EpubError::UnsupportedEncryptedMethod { method: method.to_string() }),
1519        }
1520    }
1521}
1522
1523impl EpubDoc<BufReader<File>> {
1524    /// Creates a new EPUB document instance
1525    ///
1526    /// This function is a convenience constructor for `EpubDoc`,
1527    /// used to create an EPUB parser instance directly from a file path.
1528    ///
1529    /// ## Parameters
1530    /// - `path`: The path to the EPUB file
1531    ///
1532    /// ## Return
1533    /// - `Ok(EpubDoc)`: The created EPUB document instance
1534    /// - `Err(EpubError)`: An error occurred during initialization
1535    pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, EpubError> {
1536        let file = File::open(&path).map_err(EpubError::from)?;
1537        let path = fs::canonicalize(path)?;
1538
1539        Self::from_reader(BufReader::new(file), path)
1540    }
1541
1542    /// Validates whether a file is a valid EPUB document
1543    ///
1544    /// This function attempts to open and parse the given file as an EPUB document.
1545    /// It performs basic validation to determine if the file conforms to the EPUB specification.
1546    ///
1547    /// ## Parameters
1548    /// - `path`: The path to the file to validate
1549    ///
1550    /// ## Returns
1551    /// - `Ok(true)`: The file is a valid EPUB document
1552    /// - `Ok(false)`: The file exists but is not a valid EPUB (e.g., missing required files,
1553    ///   invalid XML structure, unrecognized version)
1554    /// - `Err(EpubError)`: A critical error occurred (e.g., IO error, ZIP archive error,
1555    ///   encoding error, mutex poison)
1556    pub fn is_valid_epub<P: AsRef<Path>>(path: P) -> Result<bool, EpubError> {
1557        let result = EpubDoc::new(path);
1558
1559        match result {
1560            Ok(_) => Ok(true),
1561            Err(err) if Self::is_outside_error(&err) => Err(err),
1562            Err(_) => Ok(false),
1563        }
1564    }
1565
1566    /// Determines if an error is a "critical" external error that should be propagated
1567    ///
1568    /// ## Error Classification
1569    /// Outside errors (returned as `Err`):
1570    /// - ArchiveError: ZIP archive corruption or read errors
1571    /// - IOError: File system or read errors
1572    /// - MutexError: Thread synchronization errors
1573    /// - Utf8DecodeError: UTF-8 encoding errors
1574    /// - Utf16DecodeError: UTF-16 encoding errors
1575    /// - QuickXmlError: XML parser errors
1576    ///
1577    /// Irrelevant errors (returned as `Ok(false)`):
1578    /// - these errors could not have occurred in this situation.
1579    /// - EpubBuilderError
1580    /// - WalkDirError
1581    ///
1582    /// Content errors (returned as `Ok(false)`):
1583    /// - All other EpubError variants
1584    fn is_outside_error(err: &EpubError) -> bool {
1585        matches!(
1586            err,
1587            EpubError::ArchiveError { .. }
1588                | EpubError::IOError { .. }
1589                | EpubError::MutexError
1590                | EpubError::Utf8DecodeError { .. }
1591                | EpubError::Utf16DecodeError { .. }
1592                | EpubError::QuickXmlError { .. }
1593        )
1594    }
1595}
1596
1597#[cfg(test)]
1598mod tests {
1599    use std::{
1600        fs::File,
1601        io::BufReader,
1602        path::{Path, PathBuf},
1603    };
1604
1605    use crate::{epub::EpubDoc, error::EpubError, utils::XmlReader};
1606
1607    /// Section 3.3 package documents
1608    mod package_documents_tests {
1609        use std::{path::Path, sync::atomic::Ordering};
1610
1611        use crate::epub::{EpubDoc, EpubVersion};
1612
1613        /// ID: pkg-collections-unknown
1614        ///
1615        /// The package document contains a collection with an unknown role. The reading system must open the EPUB successfully.
1616        #[test]
1617        fn test_pkg_collections_unknown() {
1618            let epub_file = Path::new("./test_case/pkg-collections-unknown.epub");
1619            let doc = EpubDoc::new(epub_file);
1620            assert!(doc.is_ok());
1621        }
1622
1623        /// ID: pkg-creator-order
1624        ///
1625        /// Several creators are listed in the package document. The reading system must not display them out of order (but it may display only the first).
1626        #[test]
1627        fn test_pkg_creator_order() {
1628            let epub_file = Path::new("./test_case/pkg-creator-order.epub");
1629            let doc = EpubDoc::new(epub_file);
1630            assert!(doc.is_ok());
1631
1632            let doc = doc.unwrap();
1633            let creators = doc.get_metadata_value("creator");
1634            assert!(creators.is_some());
1635
1636            let creators = creators.unwrap();
1637            assert_eq!(creators.len(), 5);
1638            assert_eq!(
1639                creators,
1640                vec![
1641                    "Dave Cramer",
1642                    "Wendy Reid",
1643                    "Dan Lazin",
1644                    "Ivan Herman",
1645                    "Brady Duga",
1646                ]
1647            );
1648        }
1649
1650        /// ID: pkg-manifest-unknown
1651        ///
1652        /// The package document contains a manifest item with unknown properties. The reading system must open the EPUB successfully.
1653        #[test]
1654        fn test_pkg_manifest_order() {
1655            let epub_file = Path::new("./test_case/pkg-manifest-unknown.epub");
1656            let doc = EpubDoc::new(epub_file);
1657            assert!(doc.is_ok());
1658
1659            let doc = doc.unwrap();
1660            assert_eq!(doc.manifest.len(), 2);
1661            assert!(doc.get_manifest_item("nav").is_ok());
1662            assert!(doc.get_manifest_item("content_001").is_ok());
1663            assert!(doc.get_manifest_item("content_002").is_err());
1664        }
1665
1666        /// ID: pkg-meta-unknown
1667        ///
1668        /// The package document contains a meta tag with an unknown property. The reading system must open the EPUB successfully.
1669        #[test]
1670        fn test_pkg_meta_unknown() {
1671            let epub_file = Path::new("./test_case/pkg-meta-unknown.epub");
1672            let doc = EpubDoc::new(epub_file);
1673            assert!(doc.is_ok());
1674
1675            let doc = doc.unwrap();
1676            let value = doc.get_metadata_value("dcterms:isReferencedBy");
1677            assert!(value.is_some());
1678            let value = value.unwrap();
1679            assert_eq!(value.len(), 1);
1680            assert_eq!(
1681                value,
1682                vec!["https://www.w3.org/TR/epub-rs/#confreq-rs-pkg-meta-unknown"]
1683            );
1684
1685            let value = doc.get_metadata_value("dcterms:modified");
1686            assert!(value.is_some());
1687            let value = value.unwrap();
1688            assert_eq!(value.len(), 1);
1689            assert_eq!(value, vec!["2021-01-11T00:00:00Z"]);
1690
1691            let value = doc.get_metadata_value("dcterms:title");
1692            assert!(value.is_none());
1693        }
1694
1695        /// ID: pkg-meta-whitespace
1696        ///
1697        /// The package document's title and creator contain leading and trailing spaces along with excess internal whitespace. The reading system must render only a single space in all cases.
1698        #[test]
1699        fn test_pkg_meta_white_space() {
1700            let epub_file = Path::new("./test_case/pkg-meta-whitespace.epub");
1701            let doc = EpubDoc::new(epub_file);
1702            assert!(doc.is_ok());
1703
1704            let doc = doc.unwrap();
1705            let value = doc.get_metadata_value("creator");
1706            assert!(value.is_some());
1707            let value = value.unwrap();
1708            assert_eq!(value.len(), 1);
1709            assert_eq!(value, vec!["Dave Cramer"]);
1710
1711            let value = doc.get_metadata_value("description");
1712            assert!(value.is_some());
1713            let value = value.unwrap();
1714            assert_eq!(value.len(), 1);
1715            assert_eq!(
1716                value,
1717                vec![
1718                    "The package document's title and creator contain leading and trailing spaces along with excess internal whitespace. The reading system must render only a single space in all cases."
1719                ]
1720            );
1721        }
1722
1723        /// ID: pkg-spine-duplicate-item-hyperlink
1724        ///
1725        /// The spine contains several references to the same content document. The reading system must move to the position of the first duplicate in the reading order when following a hyperlink.
1726        #[test]
1727        fn test_pkg_spine_duplicate_item_hyperlink() {
1728            let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-hyperlink.epub");
1729            let doc = EpubDoc::new(epub_file);
1730            assert!(doc.is_ok());
1731
1732            let mut doc = doc.unwrap();
1733            assert_eq!(doc.spine.len(), 4);
1734            assert_eq!(
1735                doc.navigate_by_spine_index(0).unwrap(),
1736                doc.get_manifest_item("content_001").unwrap()
1737            );
1738            assert_eq!(
1739                doc.navigate_by_spine_index(1).unwrap(),
1740                doc.get_manifest_item("content_002").unwrap()
1741            );
1742            assert_eq!(
1743                doc.navigate_by_spine_index(2).unwrap(),
1744                doc.get_manifest_item("content_002").unwrap()
1745            );
1746            assert_eq!(
1747                doc.navigate_by_spine_index(3).unwrap(),
1748                doc.get_manifest_item("content_002").unwrap()
1749            );
1750        }
1751
1752        /// ID: pkg-spine-duplicate-item-rendering
1753        ///
1754        /// The spine contains several references to the same content document. The reading system must not skip the duplicates when rendering the reading order.
1755        #[test]
1756        fn test_pkg_spine_duplicate_item_rendering() {
1757            let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-rendering.epub");
1758            let doc = EpubDoc::new(epub_file);
1759            assert!(doc.is_ok());
1760
1761            let mut doc = doc.unwrap();
1762            assert_eq!(doc.spine.len(), 4);
1763
1764            let result = doc.spine_prev();
1765            assert!(result.is_none());
1766
1767            let result = doc.spine_next();
1768            assert!(result.is_some());
1769
1770            doc.spine_next();
1771            doc.spine_next();
1772            let result = doc.spine_next();
1773            assert!(result.is_none());
1774        }
1775
1776        /// ID: pkg-spine-nonlinear-activation
1777        ///
1778        /// An itemref in the spine is marked as non-linear. Although it (possibly) cannot be accessed through the table of contents, it can be reached from a link in the XHTML content.
1779        #[test]
1780        fn test_pkg_spine_nonlinear_activation() {
1781            let epub_file = Path::new("./test_case/pkg-spine-nonlinear-activation.epub");
1782            let doc = EpubDoc::new(epub_file);
1783            assert!(doc.is_ok());
1784
1785            let mut doc = doc.unwrap();
1786            assert!(doc.spine_prev().is_none());
1787            assert!(doc.spine_next().is_none());
1788
1789            assert!(doc.navigate_by_spine_index(1).is_some());
1790            assert!(doc.spine_prev().is_none());
1791            assert!(doc.spine_next().is_none());
1792        }
1793
1794        /// ID: pkg-spine-order
1795        ///
1796        /// Basic test of whether a reading system can display spine items in the correct order. The test fails if the reading system presents content in the order in which the file names sort, or if it presents files in manifest order rather than spine order.
1797        #[test]
1798        fn test_pkg_spine_order() {
1799            let epub_file = Path::new("./test_case/pkg-spine-order.epub");
1800            let doc = EpubDoc::new(epub_file);
1801            assert!(doc.is_ok());
1802
1803            let doc = doc.unwrap();
1804            assert_eq!(doc.spine.len(), 4);
1805            assert_eq!(
1806                doc.spine
1807                    .iter()
1808                    .map(|item| item.idref.clone())
1809                    .collect::<Vec<String>>(),
1810                vec![
1811                    "d-content_001",
1812                    "c-content_002",
1813                    "b-content_003",
1814                    "a-content_004",
1815                ]
1816            );
1817        }
1818
1819        /// ID: pkg-spine-order-svg
1820        ///
1821        /// Basic test of whether a reading system can display SVG spine items in the correct order.
1822        #[test]
1823        fn test_spine_order_svg() {
1824            let epub_file = Path::new("./test_case/pkg-spine-order-svg.epub");
1825            let doc = EpubDoc::new(epub_file);
1826            assert!(doc.is_ok());
1827
1828            let mut doc = doc.unwrap();
1829            assert_eq!(doc.spine.len(), 4);
1830
1831            loop {
1832                if let Some(spine) = doc.spine_next() {
1833                    let idref = doc.spine[doc.current_spine_index.load(Ordering::Relaxed)]
1834                        .idref
1835                        .clone();
1836                    let resource = doc.get_manifest_item(&idref);
1837                    assert!(resource.is_ok());
1838
1839                    let resource = resource.unwrap();
1840                    assert_eq!(spine, resource);
1841                } else {
1842                    break;
1843                }
1844            }
1845
1846            assert_eq!(doc.current_spine_index.load(Ordering::Relaxed), 3);
1847        }
1848
1849        /// ID: pkg-spine-unknown
1850        ///
1851        /// The package document contains a spine item with unknown properties. The reading system must open the EPUB successfully.
1852        #[test]
1853        fn test_pkg_spine_unknown() {
1854            let epub_file = Path::new("./test_case/pkg-spine-unknown.epub");
1855            let doc = EpubDoc::new(epub_file);
1856            assert!(doc.is_ok());
1857
1858            let doc = doc.unwrap();
1859            assert_eq!(doc.spine.len(), 1);
1860            assert_eq!(doc.spine[0].idref, "content_001");
1861            assert_eq!(doc.spine[0].id, None);
1862            assert_eq!(doc.spine[0].linear, true);
1863            assert_eq!(doc.spine[0].properties, Some("untrustworthy".to_string()));
1864        }
1865
1866        /// ID: pkg-title-order
1867        ///
1868        /// Several titles are listed in the package document. The reading system must use the first title (and whether to use other titles is not defined).
1869        #[test]
1870        fn test_pkg_title_order() {
1871            let epub_file = Path::new("./test_case/pkg-title-order.epub");
1872            let doc = EpubDoc::new(epub_file);
1873            assert!(doc.is_ok());
1874
1875            let doc = doc.unwrap();
1876            let title_list = doc.get_title();
1877            assert_eq!(title_list.len(), 6);
1878            assert_eq!(
1879                title_list,
1880                vec![
1881                    "pkg-title-order",
1882                    "This title must not display first",
1883                    "Also, this title must not display first",
1884                    "This title also must not display first",
1885                    "This title must also not display first",
1886                    "This title must not display first, also",
1887                ]
1888            );
1889        }
1890
1891        /// ID: pkg-unique-id
1892        ///
1893        /// The package document's dc:identifier is identical across two publications. The reading system should display both publications independently.
1894        #[test]
1895        fn test_pkg_unique_id() {
1896            let epub_file = Path::new("./test_case/pkg-unique-id.epub");
1897            let doc_1 = EpubDoc::new(epub_file);
1898            assert!(doc_1.is_ok());
1899
1900            let epub_file = Path::new("./test_case/pkg-unique-id_duplicate.epub");
1901            let doc_2 = EpubDoc::new(epub_file);
1902            assert!(doc_2.is_ok());
1903
1904            let doc_1 = doc_1.unwrap();
1905            let doc_2 = doc_2.unwrap();
1906
1907            assert_eq!(doc_1.get_identifier(), doc_2.get_identifier());
1908            assert_eq!(doc_1.unique_identifier, "pkg-unique-id");
1909            assert_eq!(doc_2.unique_identifier, "pkg-unique-id");
1910        }
1911
1912        /// ID: pkg-version-backward
1913        ///
1914        /// “Reading Systems MUST attempt to process an EPUB Publication whose Package Document version attribute is less than "3.0"”. This is an EPUB with package version attribute set to "0", to see if a reading system will open it.
1915        #[test]
1916        fn test_pkg_version_backward() {
1917            let epub_file = Path::new("./test_case/pkg-version-backward.epub");
1918            let doc = EpubDoc::new(epub_file);
1919            assert!(doc.is_ok());
1920
1921            let doc = doc.unwrap();
1922            assert_eq!(doc.version, EpubVersion::Version3_0);
1923        }
1924
1925        /// ID: pkg-linked-records
1926        ///
1927        /// Reading System must process and display the title and creator metadata from the package document. An ONIX 3.0 format linked metadata record exists, but contains neither title nor creator metadata.
1928        #[test]
1929        fn test_pkg_linked_records() {
1930            let epub_file = Path::new("./test_case/pkg-linked-records.epub");
1931            let doc = EpubDoc::new(epub_file);
1932            assert!(doc.is_ok());
1933
1934            let doc = doc.unwrap();
1935            assert_eq!(doc.metadata_link.len(), 3);
1936
1937            let item = doc.metadata_link.iter().find(|&item| {
1938                if let Some(properties) = &item.properties {
1939                    properties.eq("onix")
1940                } else {
1941                    false
1942                }
1943            });
1944            assert!(item.is_some());
1945        }
1946
1947        /// ID: pkg-manifest-unlisted-resource
1948        ///
1949        /// The XHTML content references an image that does not appear in the manifest. The image should not be shown.
1950        #[test]
1951        fn test_pkg_manifest_unlisted_resource() {
1952            let epub_file = Path::new("./test_case/pkg-manifest-unlisted-resource.epub");
1953            let doc = EpubDoc::new(epub_file);
1954            assert!(doc.is_ok());
1955
1956            let doc = doc.unwrap();
1957            assert!(
1958                doc.get_manifest_item_by_path("EPUB/content_001.xhtml")
1959                    .is_ok()
1960            );
1961
1962            assert!(doc.get_manifest_item_by_path("EPUB/red.png").is_err());
1963            let err = doc.get_manifest_item_by_path("EPUB/red.png").unwrap_err();
1964            assert_eq!(
1965                err.to_string(),
1966                "Resource not found: Unable to find resource from \"EPUB/red.png\"."
1967            );
1968        }
1969    }
1970
1971    /// Section 3.4 manifest fallbacks
1972    ///
1973    /// The tests under this module seem to favor the reading system rather than the EPUB format itself
1974    mod manifest_fallbacks_tests {
1975        use std::path::Path;
1976
1977        use crate::epub::EpubDoc;
1978
1979        /// ID: pub-foreign_bad-fallback
1980        ///
1981        /// This is a test of manifest fallbacks where both the spine item and the fallback are likely to be unsupported. The spine item is a DMG, with a fallback to a PSD file. Reading systems may raise an error on the ingenstion workflow.
1982        #[test]
1983        fn test_pub_foreign_bad_fallback() {
1984            let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
1985            let doc = EpubDoc::new(epub_file);
1986            assert!(doc.is_ok());
1987
1988            let doc = doc.unwrap();
1989            assert!(doc.get_manifest_item("content_001").is_ok());
1990            assert!(doc.get_manifest_item("bar").is_ok());
1991
1992            assert_eq!(
1993                doc.get_manifest_item_with_fallback("content_001", &vec!["application/xhtml+xml"])
1994                    .unwrap_err()
1995                    .to_string(),
1996                "No supported file format: The fallback resource does not contain the file format you support."
1997            );
1998        }
1999
2000        /// ID: pub-foreign_image
2001        ///
2002        /// An HTML content file contains a PSD image, with a manifest fallback to a PNG image. This tests fallbacks for resources that are not in the spine.
2003        #[test]
2004        fn test_pub_foreign_image() {
2005            let epub_file = Path::new("./test_case/pub-foreign_image.epub");
2006            let doc = EpubDoc::new(epub_file);
2007            assert!(doc.is_ok());
2008
2009            let doc = doc.unwrap();
2010            let result = doc.get_manifest_item_with_fallback(
2011                "image-tiff",
2012                &vec!["image/png", "application/xhtml+xml"],
2013            );
2014            assert!(result.is_ok());
2015
2016            let (_, mime) = result.unwrap();
2017            assert_eq!(mime, "image/png");
2018        }
2019
2020        /// ID: pub-foreign_json-spine
2021        ///
2022        /// This EPUB uses a JSON content file in the spine, with a manifest fallback to an HTML document. If the reading system does not support JSON, it should display the HTML.
2023        #[test]
2024        fn test_pub_foreign_json_spine() {
2025            let epub_file = Path::new("./test_case/pub-foreign_json-spine.epub");
2026            let doc = EpubDoc::new(epub_file);
2027            assert!(doc.is_ok());
2028
2029            let doc = doc.unwrap();
2030            let result = doc.get_manifest_item_with_fallback(
2031                "content_primary",
2032                &vec!["application/xhtml+xml", "application/json"],
2033            );
2034            assert!(result.is_ok());
2035            let (_, mime) = result.unwrap();
2036            assert_eq!(mime, "application/json");
2037
2038            let result = doc
2039                .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
2040            assert!(result.is_ok());
2041            let (_, mime) = result.unwrap();
2042            assert_eq!(mime, "application/xhtml+xml");
2043        }
2044
2045        /// ID: pub-foreign_xml-spine
2046        ///
2047        /// This EPUB uses an ordinary XML content file with mimetype application/xml in the spine, with a manifest fallback to an HTML document. If the reading system does not support XML, it should display the HTML.
2048        #[test]
2049        fn test_pub_foreign_xml_spine() {
2050            let epub_file = Path::new("./test_case/pub-foreign_xml-spine.epub");
2051            let doc = EpubDoc::new(epub_file);
2052            assert!(doc.is_ok());
2053
2054            let doc = doc.unwrap();
2055            let result = doc.get_manifest_item_with_fallback(
2056                "content_primary",
2057                &vec!["application/xhtml+xml", "application/xml"],
2058            );
2059            assert!(result.is_ok());
2060            let (_, mime) = result.unwrap();
2061            assert_eq!(mime, "application/xml");
2062
2063            let result = doc
2064                .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
2065            assert!(result.is_ok());
2066            let (_, mime) = result.unwrap();
2067            assert_eq!(mime, "application/xhtml+xml");
2068        }
2069
2070        /// ID: pub-foreign_xml-suffix-spine
2071        ///
2072        /// This EPUB uses an custom XML content file with mimetype application/dtc+xml in the spine, with a manifest fallback to an HTML document. If the reading system does not support XML, it should display the HTML.
2073        #[test]
2074        fn test_pub_foreign_xml_suffix_spine() {
2075            let epub_file = Path::new("./test_case/pub-foreign_xml-suffix-spine.epub");
2076            let doc = EpubDoc::new(epub_file);
2077            assert!(doc.is_ok());
2078
2079            let doc = doc.unwrap();
2080            let result = doc.get_manifest_item_with_fallback(
2081                "content_primary",
2082                &vec!["application/xhtml+xml", "application/dtc+xml"],
2083            );
2084            assert!(result.is_ok());
2085            let (_, mime) = result.unwrap();
2086            assert_eq!(mime, "application/dtc+xml");
2087
2088            let result = doc
2089                .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
2090            assert!(result.is_ok());
2091            let (_, mime) = result.unwrap();
2092            assert_eq!(mime, "application/xhtml+xml");
2093        }
2094    }
2095
2096    /// Section 3.9 open container format
2097    mod open_container_format_tests {
2098        use std::{cmp::min, io::Read, path::Path};
2099
2100        use sha1::{Digest, Sha1};
2101
2102        use crate::epub::EpubDoc;
2103
2104        /// ID: ocf-metainf-inc
2105        ///
2106        /// An extra configuration file, not in the reserved files' list, is added to the META-INF folder; this file must be ignored.
2107        #[test]
2108        fn test_ocf_metainf_inc() {
2109            let epub_file = Path::new("./test_case/ocf-metainf-inc.epub");
2110            let doc = EpubDoc::new(epub_file);
2111            assert!(doc.is_ok());
2112        }
2113
2114        /// ID: ocf-metainf-manifest
2115        ///
2116        /// An ancillary manifest file, containing an extra spine item, is present in the META-INF directory; this extra item must be ignored by the reading system.
2117        #[test]
2118        fn test_ocf_metainf_manifest() {
2119            let epub_file = Path::new("./test_case/ocf-metainf-manifest.epub");
2120            let doc = EpubDoc::new(epub_file);
2121            assert!(doc.is_ok());
2122        }
2123
2124        /// ID: ocf-package_arbitrary
2125        ///
2126        /// The EPUB contains three valid package files and three corresponding sets of content documents, but only one of the packages, in an unusual subdirectory, is referenced by the container.xml file. The reading system must use this package.
2127        #[test]
2128        fn test_ocf_package_arbitrary() {
2129            let epub_file = Path::new("./test_case/ocf-package_arbitrary.epub");
2130            let doc = EpubDoc::new(epub_file);
2131            assert!(doc.is_ok());
2132
2133            let doc = doc.unwrap();
2134            assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2135        }
2136
2137        /// ID: ocf-package_multiple
2138        ///
2139        /// The EPUB contains three valid package files and three corresponding sets of content documents, all referenced by the container.xml file. The reading system must use the first package.
2140        #[test]
2141        fn test_ocf_package_multiple() {
2142            let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2143            let doc = EpubDoc::new(epub_file);
2144            assert!(doc.is_ok());
2145
2146            let doc = doc.unwrap();
2147            assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2148            assert_eq!(doc.base_path, Path::new("FOO/BAR"));
2149        }
2150
2151        /// ID: ocf-url_link-leaking-relative
2152        ///
2153        /// Use a relative link with several double-dot path segments from the content to a photograph. The folder hierarchy containing the photograph starts at the root level; the relative image reference exceeds depth of hierarchy.
2154        #[test]
2155        fn test_ocf_url_link_leaking_relative() {
2156            let epub_file = Path::new("./test_case/ocf-url_link-leaking-relative.epub");
2157            let doc = EpubDoc::new(epub_file);
2158            assert!(doc.is_err());
2159            assert_eq!(
2160                doc.err().unwrap().to_string(),
2161                String::from(
2162                    "Relative link leakage: Path \"../../../../media/imgs/monastery.jpg\" is out of container range."
2163                )
2164            )
2165        }
2166
2167        /// ID: ocf-url_link-path-absolute
2168        ///
2169        /// Use a path-absolute link, i.e., beginning with a leading slash, from the content to a photograph. The folder hierarchy containing the photograph starts at the root level.
2170        #[test]
2171        fn test_ocf_url_link_path_absolute() {
2172            let epub_file = Path::new("./test_case/ocf-url_link-path-absolute.epub");
2173            let doc = EpubDoc::new(epub_file);
2174            assert!(doc.is_ok());
2175
2176            let doc = doc.unwrap();
2177            let resource = doc.manifest.get("photo").unwrap();
2178            assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2179        }
2180
2181        /// ID: ocf-url_link-relative
2182        ///
2183        /// A simple relative link from the content to a photograph. The folder hierarchy containing the photograph starts at the root level.
2184        #[test]
2185        fn test_ocf_url_link_relative() {
2186            let epub_file = Path::new("./test_case/ocf-url_link-relative.epub");
2187            let doc = EpubDoc::new(epub_file);
2188            assert!(doc.is_ok());
2189
2190            let doc = doc.unwrap();
2191            let resource = doc.manifest.get("photo").unwrap();
2192            assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2193        }
2194
2195        /// ID: ocf-url_manifest
2196        ///
2197        /// The manifest refers to an XHTML file in an arbitrary subfolder. The reading system must be able to find the content.
2198        #[test]
2199        fn test_ocf_url_manifest() {
2200            let epub_file = Path::new("./test_case/ocf-url_manifest.epub");
2201            let doc = EpubDoc::new(epub_file);
2202            assert!(doc.is_ok());
2203
2204            let doc = doc.unwrap();
2205            assert!(doc.get_manifest_item("nav").is_ok());
2206            assert!(doc.get_manifest_item("content_001").is_ok());
2207            assert!(doc.get_manifest_item("content_002").is_err());
2208        }
2209
2210        /// ID: ocf-url_relative
2211        ///
2212        /// The manifest refers to an XHTML file in an arbitrary subfolder that is relative to the package's own arbitrary folder. The reading system must be able to find the content.
2213        #[test]
2214        fn test_ocf_url_relative() {
2215            let epub_file = Path::new("./test_case/ocf-url_relative.epub");
2216            let doc = EpubDoc::new(epub_file);
2217            assert!(doc.is_ok());
2218
2219            let doc = doc.unwrap();
2220            assert_eq!(doc.package_path, Path::new("foo/BAR/baz.opf"));
2221            assert_eq!(doc.base_path, Path::new("foo/BAR"));
2222            assert_eq!(
2223                doc.manifest.get("nav").unwrap().path,
2224                Path::new("foo/BAR/nav.xhtml")
2225            );
2226            assert_eq!(
2227                doc.manifest.get("content_001").unwrap().path,
2228                Path::new("foo/BAR/qux/content_001.xhtml")
2229            );
2230            assert!(doc.get_manifest_item("nav").is_ok());
2231            assert!(doc.get_manifest_item("content_001").is_ok());
2232        }
2233
2234        /// ID: ocf-zip-comp
2235        ///
2236        /// MUST treat any OCF ZIP container that uses compression techniques other than Deflate as in error.
2237        /// This test case does not use compression methods other than Deflate and cannot detect whether it is effective.
2238        #[test]
2239        fn test_ocf_zip_comp() {
2240            let epub_file = Path::new("./test_case/ocf-zip-comp.epub");
2241            let doc = EpubDoc::new(epub_file);
2242            assert!(doc.is_ok());
2243        }
2244
2245        /// ID: ocf-zip-mult
2246        ///
2247        /// MUST treat any OCF ZIP container that splits the content into segments as in error.
2248        /// This test case is not a segmented OCF ZIP container and cannot be tested to see if it is valid.
2249        #[test]
2250        fn test_ocf_zip_mult() {
2251            let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2252            let doc = EpubDoc::new(epub_file);
2253            assert!(doc.is_ok());
2254        }
2255
2256        /// ID: ocf-font_obfuscation
2257        ///
2258        /// An obfuscated (TrueType) font should be displayed after de-obfuscation.
2259        #[test]
2260        fn test_ocf_font_obfuscation() {
2261            let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2262            let doc = EpubDoc::new(epub_file);
2263            assert!(doc.is_ok());
2264
2265            let doc = doc.unwrap();
2266            let unique_id = doc.unique_identifier.clone();
2267
2268            let mut hasher = Sha1::new();
2269            hasher.update(unique_id.as_bytes());
2270            let hash = hasher.finalize();
2271            let mut key = vec![0u8; 1040];
2272            for i in 0..1040 {
2273                key[i] = hash[i % hash.len()];
2274            }
2275
2276            assert!(doc.encryption.is_some());
2277            assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2278
2279            let data = &doc.encryption.unwrap()[0];
2280            assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2281
2282            let font_file = doc
2283                .archive
2284                .lock()
2285                .unwrap()
2286                .by_name(&data.data)
2287                .unwrap()
2288                .bytes()
2289                .collect::<Result<Vec<u8>, _>>();
2290            assert!(font_file.is_ok());
2291            let font_file = font_file.unwrap();
2292
2293            // 根据EPUB规范，字体混淆是直接对字体文件进行的，不需要解压步骤，直接进行去混淆处理
2294            let mut deobfuscated = font_file.clone();
2295            for i in 0..min(1040, deobfuscated.len()) {
2296                deobfuscated[i] ^= key[i];
2297            }
2298
2299            assert!(is_valid_font(&deobfuscated));
2300        }
2301
2302        /// ID: ocf-font_obfuscation-bis
2303        ///
2304        /// An obfuscated (TrueType) font should not be displayed after de-obfuscation, because the obfuscation used a different publication id.
2305        #[test]
2306        fn test_ocf_font_obfuscation_bis() {
2307            let epub_file = Path::new("./test_case/ocf-font_obfuscation_bis.epub");
2308            let doc = EpubDoc::new(epub_file);
2309            assert!(doc.is_ok());
2310
2311            let doc = doc.unwrap();
2312
2313            let wrong_unique_id = "wrong-publication-id";
2314            let mut hasher = Sha1::new();
2315            hasher.update(wrong_unique_id.as_bytes());
2316            let hash = hasher.finalize();
2317            let mut wrong_key = vec![0u8; 1040];
2318            for i in 0..1040 {
2319                wrong_key[i] = hash[i % hash.len()];
2320            }
2321
2322            assert!(doc.encryption.is_some());
2323            assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2324
2325            let data = &doc.encryption.unwrap()[0];
2326            assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2327
2328            let font_file = doc
2329                .archive
2330                .lock()
2331                .unwrap()
2332                .by_name(&data.data)
2333                .unwrap()
2334                .bytes()
2335                .collect::<Result<Vec<u8>, _>>();
2336            assert!(font_file.is_ok());
2337            let font_file = font_file.unwrap();
2338
2339            // 使用错误的密钥进行去混淆
2340            let mut deobfuscated_with_wrong_key = font_file.clone();
2341            for i in 0..std::cmp::min(1040, deobfuscated_with_wrong_key.len()) {
2342                deobfuscated_with_wrong_key[i] ^= wrong_key[i];
2343            }
2344
2345            assert!(!is_valid_font(&deobfuscated_with_wrong_key));
2346        }
2347
2348        fn is_valid_font(data: &[u8]) -> bool {
2349            if data.len() < 4 {
2350                return false;
2351            }
2352            let sig = &data[0..4];
2353            // OTF: "OTTO"
2354            // TTF: 0x00010000, 0x00020000, "true", "typ1"
2355            sig == b"OTTO"
2356                || sig == b"\x00\x01\x00\x00"
2357                || sig == b"\x00\x02\x00\x00"
2358                || sig == b"true"
2359                || sig == b"typ1"
2360        }
2361    }
2362
2363    #[test]
2364    fn test_parse_container() {
2365        let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2366        let doc = EpubDoc::new(epub_file);
2367        assert!(doc.is_ok());
2368
2369        // let doc = doc.unwrap();
2370        let container = r#"
2371        <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2372            <rootfiles></rootfiles>
2373        </container>
2374        "#
2375        .to_string();
2376
2377        let result = EpubDoc::<BufReader<File>>::parse_container(container);
2378        assert!(result.is_err());
2379        assert_eq!(
2380            result.unwrap_err(),
2381            EpubError::NonCanonicalFile { tag: "rootfile".to_string() }
2382        );
2383
2384        let container = r#"
2385        <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2386            <rootfiles>
2387                <rootfile media-type="application/oebps-package+xml"/>
2388            </rootfiles>
2389        </container>
2390        "#
2391        .to_string();
2392
2393        let result = EpubDoc::<BufReader<File>>::parse_container(container);
2394        assert!(result.is_err());
2395        assert_eq!(
2396            result.unwrap_err(),
2397            EpubError::MissingRequiredAttribute {
2398                tag: "rootfile".to_string(),
2399                attribute: "full-path".to_string(),
2400            }
2401        );
2402
2403        let container = r#"
2404        <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2405            <rootfiles>
2406                <rootfile media-type="application/oebps-package+xml" full-path="EPUB/content.opf"/>
2407            </rootfiles>
2408        </container>
2409        "#
2410        .to_string();
2411
2412        let result = EpubDoc::<BufReader<File>>::parse_container(container);
2413        assert!(result.is_ok());
2414        assert_eq!(result.unwrap(), PathBuf::from("EPUB/content.opf"))
2415    }
2416
2417    #[test]
2418    fn test_parse_manifest() {
2419        let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2420        let doc = EpubDoc::new(epub_file);
2421        assert!(doc.is_ok());
2422
2423        let manifest = r#"
2424        <manifest>
2425            <item href="content_001.xhtml" media-type="application/xhtml+xml"/>
2426            <item properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2427        </manifest>
2428        "#;
2429        let mut doc = doc.unwrap();
2430        let element = XmlReader::parse(manifest);
2431        assert!(element.is_ok());
2432
2433        let element = element.unwrap();
2434        let result = doc.parse_manifest(&element);
2435        assert!(result.is_err());
2436        assert_eq!(
2437            result.unwrap_err(),
2438            EpubError::MissingRequiredAttribute {
2439                tag: "item".to_string(),
2440                attribute: "id".to_string(),
2441            },
2442        );
2443
2444        let manifest = r#"
2445        <manifest>
2446            <item id="content_001" media-type="application/xhtml+xml"/>
2447            <item id="nav" properties="nav" media-type="application/xhtml+xml"/>
2448        </manifest>
2449        "#;
2450        let element = XmlReader::parse(manifest);
2451        assert!(element.is_ok());
2452
2453        let element = element.unwrap();
2454        let result = doc.parse_manifest(&element);
2455        assert!(result.is_err());
2456        assert_eq!(
2457            result.unwrap_err(),
2458            EpubError::MissingRequiredAttribute {
2459                tag: "item".to_string(),
2460                attribute: "href".to_string(),
2461            },
2462        );
2463
2464        let manifest = r#"
2465        <manifest>
2466            <item id="content_001" href="content_001.xhtml"/>
2467            <item id="nav" properties="nav" href="nav.xhtml"/>
2468        </manifest>
2469        "#;
2470        let element = XmlReader::parse(manifest);
2471        assert!(element.is_ok());
2472
2473        let element = element.unwrap();
2474        let result = doc.parse_manifest(&element);
2475        assert!(result.is_err());
2476        assert_eq!(
2477            result.unwrap_err(),
2478            EpubError::MissingRequiredAttribute {
2479                tag: "item".to_string(),
2480                attribute: "media-type".to_string(),
2481            },
2482        );
2483
2484        let manifest = r#"
2485        <manifest>
2486            <item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
2487            <item id="nav" properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2488        </manifest>
2489        "#;
2490        let element = XmlReader::parse(manifest);
2491        assert!(element.is_ok());
2492
2493        let element = element.unwrap();
2494        let result = doc.parse_manifest(&element);
2495        assert!(result.is_ok());
2496    }
2497
2498    /// Test for function `has_encryption`
2499    #[test]
2500    fn test_fn_has_encryption() {
2501        let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2502        let doc = EpubDoc::new(epub_file);
2503        assert!(doc.is_ok());
2504
2505        let doc = doc.unwrap();
2506        assert!(doc.has_encryption());
2507    }
2508
2509    /// This test is used to detect whether the "META-INF/encryption.xml" file is parsed correctly
2510    #[test]
2511    fn test_fn_parse_encryption() {
2512        let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2513        let doc = EpubDoc::new(epub_file);
2514        assert!(doc.is_ok());
2515
2516        let doc = doc.unwrap();
2517        assert!(doc.encryption.is_some());
2518
2519        let encryption = doc.encryption.unwrap();
2520        assert_eq!(encryption.len(), 1);
2521        assert_eq!(encryption[0].method, "http://www.idpf.org/2008/embedding");
2522        assert_eq!(encryption[0].data, "EPUB/fonts/Lobster.ttf");
2523    }
2524
2525    #[test]
2526    fn test_get_metadata_existing_key() {
2527        let epub_file = Path::new("./test_case/epub-33.epub");
2528        let doc = EpubDoc::new(epub_file);
2529        assert!(doc.is_ok());
2530
2531        let doc = doc.unwrap();
2532
2533        let titles = doc.get_metadata("title");
2534        assert!(titles.is_some());
2535
2536        let titles = titles.unwrap();
2537        assert_eq!(titles.len(), 1);
2538        assert_eq!(titles[0].property, "title");
2539        assert_eq!(titles[0].value, "EPUB 3.3");
2540
2541        let languages = doc.get_metadata("language");
2542        assert!(languages.is_some());
2543
2544        let languages = languages.unwrap();
2545        assert_eq!(languages.len(), 1);
2546        assert_eq!(languages[0].property, "language");
2547        assert_eq!(languages[0].value, "en-us");
2548
2549        let language = doc.get_language();
2550        assert_eq!(language, vec!["en-us"]);
2551    }
2552
2553    #[test]
2554    fn test_get_metadata_nonexistent_key() {
2555        let epub_file = Path::new("./test_case/epub-33.epub");
2556        let doc = EpubDoc::new(epub_file);
2557        assert!(doc.is_ok());
2558
2559        let doc = doc.unwrap();
2560        let metadata = doc.get_metadata("nonexistent");
2561        assert!(metadata.is_none());
2562    }
2563
2564    #[test]
2565    fn test_get_metadata_multiple_items_same_type() {
2566        let epub_file = Path::new("./test_case/epub-33.epub");
2567        let doc = EpubDoc::new(epub_file);
2568        assert!(doc.is_ok());
2569
2570        let doc = doc.unwrap();
2571
2572        let creators = doc.get_metadata("creator");
2573        assert!(creators.is_some());
2574
2575        let creators = creators.unwrap();
2576        assert_eq!(creators.len(), 3);
2577
2578        assert_eq!(creators[0].id, Some("creator_id_0".to_string()));
2579        assert_eq!(creators[0].property, "creator");
2580        assert_eq!(creators[0].value, "Matt Garrish, DAISY Consortium");
2581
2582        assert_eq!(creators[1].id, Some("creator_id_1".to_string()));
2583        assert_eq!(creators[1].property, "creator");
2584        assert_eq!(creators[1].value, "Ivan Herman, W3C");
2585
2586        assert_eq!(creators[2].id, Some("creator_id_2".to_string()));
2587        assert_eq!(creators[2].property, "creator");
2588        assert_eq!(creators[2].value, "Dave Cramer, Invited Expert");
2589    }
2590
2591    #[test]
2592    fn test_get_metadata_with_refinement() {
2593        let epub_file = Path::new("./test_case/epub-33.epub");
2594        let doc = EpubDoc::new(epub_file);
2595        assert!(doc.is_ok());
2596
2597        let doc = doc.unwrap();
2598
2599        let title = doc.get_metadata("title");
2600        assert!(title.is_some());
2601
2602        let title = title.unwrap();
2603        assert_eq!(title.len(), 1);
2604        assert_eq!(title[0].refined.len(), 1);
2605        assert_eq!(title[0].refined[0].property, "title-type");
2606        assert_eq!(title[0].refined[0].value, "main");
2607    }
2608
2609    #[test]
2610    fn test_get_manifest_item_with_fallback() {
2611        let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
2612        let doc = EpubDoc::new(epub_file);
2613        assert!(doc.is_ok());
2614
2615        let doc = doc.unwrap();
2616        assert!(doc.get_manifest_item("content_001").is_ok());
2617        assert!(doc.get_manifest_item("bar").is_ok());
2618
2619        // 当回退链上存在可回退资源时能获取资源
2620        if let Ok((_, mime)) =
2621            doc.get_manifest_item_with_fallback("content_001", &vec!["image/psd"])
2622        {
2623            assert_eq!(mime, "image/psd");
2624        } else {
2625            assert!(false, "get_manifest_item_with_fallback failed");
2626        }
2627
2628        // 当回退链上不存在可回退资源时无法获取资源
2629        assert_eq!(
2630            doc.get_manifest_item_with_fallback("content_001", &vec!["application/xhtml+xml"])
2631                .unwrap_err()
2632                .to_string(),
2633            "No supported file format: The fallback resource does not contain the file format you support."
2634        );
2635    }
2636
2637    #[test]
2638    fn test_get_cover() {
2639        let epub_file = Path::new("./test_case/pkg-cover-image.epub");
2640        let doc = EpubDoc::new(epub_file);
2641        if let Err(err) = &doc {
2642            println!("{}", err);
2643        }
2644        assert!(doc.is_ok());
2645
2646        let doc = doc.unwrap();
2647        let result = doc.get_cover();
2648        assert!(result.is_some());
2649
2650        let (data, mime) = result.unwrap();
2651        assert_eq!(data.len(), 5785);
2652        assert_eq!(mime, "image/jpeg");
2653    }
2654
2655    #[test]
2656    fn test_epub_2() {
2657        let epub_file = Path::new("./test_case/epub-2.epub");
2658        let doc = EpubDoc::new(epub_file);
2659        assert!(doc.is_ok());
2660
2661        let doc = doc.unwrap();
2662
2663        let titles = doc.get_title();
2664        assert_eq!(titles, vec!["Minimal EPUB 2.0"]);
2665    }
2666
2667    #[test]
2668    fn test_is_valid_epub_valid_file() {
2669        let result = EpubDoc::is_valid_epub("./test_case/epub-2.epub");
2670        assert!(result.is_ok());
2671        assert_eq!(result.unwrap(), true);
2672    }
2673
2674    #[test]
2675    fn test_is_valid_epub_invalid_path() {
2676        let result = EpubDoc::is_valid_epub("./test_case/nonexistent.epub");
2677        assert!(result.is_err());
2678    }
2679
2680    #[test]
2681    fn test_is_valid_epub_corrupted_zip() {
2682        let temp_dir = std::env::temp_dir();
2683        let corrupted_file = temp_dir.join("corrupted.epub");
2684
2685        std::fs::write(&corrupted_file, b"not a valid zip file").unwrap();
2686
2687        let result = EpubDoc::is_valid_epub(&corrupted_file);
2688
2689        assert!(result.is_err());
2690        let err = result.unwrap_err();
2691        assert!(matches!(err, EpubError::ArchiveError { .. }));
2692
2693        std::fs::remove_file(corrupted_file).ok();
2694    }
2695
2696    #[test]
2697    fn test_is_valid_epub_valid_epub_3() {
2698        let result = EpubDoc::is_valid_epub("./test_case/epub-33.epub");
2699        assert!(result.is_ok());
2700        assert_eq!(result.unwrap(), true);
2701    }
2702
2703    #[test]
2704    fn test_is_outside_error() {
2705        let archive_error = EpubError::ArchiveError {
2706            source: zip::result::ZipError::Io(std::io::Error::new(
2707                std::io::ErrorKind::Other,
2708                "test",
2709            )),
2710        };
2711        assert!(EpubDoc::<BufReader<File>>::is_outside_error(&archive_error));
2712
2713        let io_error = EpubError::IOError {
2714            source: std::io::Error::new(std::io::ErrorKind::NotFound, "test"),
2715        };
2716        assert!(EpubDoc::<BufReader<File>>::is_outside_error(&io_error));
2717
2718        let non_canonical = EpubError::NonCanonicalEpub { expected_file: "test".to_string() };
2719        assert!(!EpubDoc::<BufReader<File>>::is_outside_error(
2720            &non_canonical
2721        ));
2722
2723        let missing_attr = EpubError::MissingRequiredAttribute {
2724            tag: "test".to_string(),
2725            attribute: "id".to_string(),
2726        };
2727        assert!(!EpubDoc::<BufReader<File>>::is_outside_error(&missing_attr));
2728    }
2729
2730    mod metadata_sheet_tests {
2731        use crate::epub::EpubDoc;
2732        use std::path::Path;
2733
2734        #[test]
2735        fn test_get_metadata_sheet_basic_fields() {
2736            let epub_file = Path::new("./test_case/epub-33.epub");
2737            let doc = EpubDoc::new(epub_file);
2738            assert!(doc.is_ok());
2739
2740            let doc = doc.unwrap();
2741            let sheet = doc.get_metadata_sheet();
2742
2743            assert_eq!(sheet.title.len(), 1);
2744            assert_eq!(sheet.title[0], "EPUB 3.3");
2745
2746            assert_eq!(sheet.language.len(), 1);
2747            assert_eq!(sheet.language[0], "en-us");
2748
2749            assert_eq!(sheet.publisher, "World Wide Web Consortium");
2750
2751            assert_eq!(
2752                sheet.rights,
2753                "https://www.w3.org/Consortium/Legal/2015/doc-license"
2754            );
2755        }
2756
2757        #[test]
2758        fn test_get_metadata_sheet_multiple_creators() {
2759            let epub_file = Path::new("./test_case/epub-33.epub");
2760            let doc = EpubDoc::new(epub_file);
2761            assert!(doc.is_ok());
2762
2763            let doc = doc.unwrap();
2764            let sheet = doc.get_metadata_sheet();
2765
2766            assert_eq!(sheet.creator.len(), 3);
2767            assert_eq!(sheet.creator[0], "Matt Garrish, DAISY Consortium");
2768            assert_eq!(sheet.creator[1], "Ivan Herman, W3C");
2769            assert_eq!(sheet.creator[2], "Dave Cramer, Invited Expert");
2770        }
2771
2772        #[test]
2773        fn test_get_metadata_sheet_multiple_subjects() {
2774            let epub_file = Path::new("./test_case/epub-33.epub");
2775            let doc = EpubDoc::new(epub_file);
2776            assert!(doc.is_ok());
2777
2778            let doc = doc.unwrap();
2779            let sheet = doc.get_metadata_sheet();
2780
2781            assert_eq!(sheet.subject.len(), 2);
2782            assert_eq!(sheet.subject[0], "Information systems~World Wide Web");
2783            assert_eq!(
2784                sheet.subject[1],
2785                "General and reference~Computing standards, RFCs and guidelines"
2786            );
2787        }
2788
2789        #[test]
2790        fn test_get_metadata_sheet_identifier_with_id() {
2791            let epub_file = Path::new("./test_case/epub-33.epub");
2792            let doc = EpubDoc::new(epub_file);
2793            assert!(doc.is_ok());
2794
2795            let doc = doc.unwrap();
2796            let sheet = doc.get_metadata_sheet();
2797
2798            assert!(sheet.identifier.contains_key("pub-id"));
2799            assert_eq!(
2800                sheet.identifier.get("pub-id"),
2801                Some(&"https://www.w3.org/TR/epub-33/".to_string())
2802            );
2803        }
2804
2805        #[test]
2806        fn test_get_metadata_sheet_missing_scalar_fields() {
2807            let epub_file = Path::new("./test_case/epub-33.epub");
2808            let doc = EpubDoc::new(epub_file);
2809            assert!(doc.is_ok());
2810
2811            let doc = doc.unwrap();
2812            let sheet = doc.get_metadata_sheet();
2813
2814            assert!(sheet.coverage.is_empty());
2815            assert!(sheet.description.is_empty());
2816            assert!(sheet.format.is_empty());
2817            assert!(sheet.source.is_empty());
2818            assert!(sheet.epub_type.is_empty());
2819            assert!(sheet.contributor.is_empty());
2820            assert!(sheet.relation.is_empty());
2821        }
2822
2823        #[test]
2824        fn test_get_metadata_sheet_title_refinement_via_get_metadata() {
2825            let epub_file = Path::new("./test_case/epub-33.epub");
2826            let doc = EpubDoc::new(epub_file);
2827            assert!(doc.is_ok());
2828
2829            let doc = doc.unwrap();
2830            let title_metadata = doc.get_metadata("title");
2831            assert!(title_metadata.is_some());
2832
2833            let title_metadata = title_metadata.unwrap();
2834            assert_eq!(title_metadata.len(), 1);
2835            assert_eq!(title_metadata[0].refined.len(), 1);
2836            assert_eq!(title_metadata[0].refined[0].property, "title-type");
2837            assert_eq!(title_metadata[0].refined[0].value, "main");
2838
2839            let sheet = doc.get_metadata_sheet();
2840            assert_eq!(sheet.title.len(), 1);
2841            assert_eq!(sheet.title[0], "EPUB 3.3");
2842        }
2843
2844        #[test]
2845        fn test_get_metadata_sheet_ignores_unknown_properties() {
2846            let epub_file = Path::new("./test_case/epub-33.epub");
2847            let doc = EpubDoc::new(epub_file);
2848            assert!(doc.is_ok());
2849
2850            let doc = doc.unwrap();
2851            let sheet = doc.get_metadata_sheet();
2852
2853            assert_eq!(sheet.title.len(), 1);
2854            assert_eq!(sheet.creator.len(), 3);
2855            assert_eq!(sheet.subject.len(), 2);
2856        }
2857
2858        #[test]
2859        fn test_get_metadata_sheet_idempotent() {
2860            let epub_file = Path::new("./test_case/epub-33.epub");
2861            let doc = EpubDoc::new(epub_file);
2862            assert!(doc.is_ok());
2863
2864            let doc = doc.unwrap();
2865            let sheet1 = doc.get_metadata_sheet();
2866            let sheet2 = doc.get_metadata_sheet();
2867
2868            assert_eq!(sheet1.title, sheet2.title);
2869            assert_eq!(sheet1.creator, sheet2.creator);
2870            assert_eq!(sheet1.language, sheet2.language);
2871            assert_eq!(sheet1.identifier, sheet2.identifier);
2872            assert_eq!(sheet1.date, sheet2.date);
2873        }
2874    }
2875}
lib_epub/epub.rs

lib_epub/
epub.rs