Skip to main content

lib_epub/
epub.rs

1//! The core module of the EPUB parsing library
2//!
3//! This module provides complete parsing functionality for EPUB ebook files
4//! and is the core component of the entire library. The `EpubDoc` structure
5//! encapsulates all the parsing logic and data access interfaces for EPUB files.
6//!
7//! ## Main references to EPUB specs:
8//! - <https://www.w3.org/TR/epub-33>
9//! - <https://idpf.org/epub/201>
10//!
11//! ## Potential Issues
12//! - The generic parameter `R: Read + Seek` increases complexity, particularly
13//!   in asynchronous environments. The current design is not conducive to multi-threaded
14//!   concurrent access and requires an external synchronization mechanism.
15//! - Some error handling may not be sufficiently nuanced, and certain edge cases
16//!   may not be adequately considered.
17//! - Loading the entire EPUB document at once may result in significant memory consumption,
18//!   especially for large publications.
19//!
20//! ## Future Work
21//! - Supports more EPUB specification features, such as media overlay and scripts.
22
23use std::{
24    collections::HashMap,
25    fs::{self, File},
26    io::{BufReader, Read, Seek},
27    path::{Path, PathBuf},
28    sync::{
29        Arc, Mutex, OnceLock,
30        atomic::{AtomicUsize, Ordering},
31    },
32};
33
34#[cfg(not(feature = "no-indexmap"))]
35use indexmap::IndexMap;
36use zip::{ZipArchive, result::ZipError};
37
38use crate::{
39    error::EpubError,
40    types::{
41        EncryptionData, EpubVersion, ManifestItem, MetadataItem, MetadataLinkItem,
42        MetadataRefinement, MetadataSheet, NavPoint, SpineItem,
43    },
44    utils::{
45        DecodeBytes, NormalizeWhitespace, XmlElement, XmlReader, adobe_font_dencryption,
46        check_realtive_link_leakage, compression_method_check, get_file_in_zip_archive,
47        idpf_font_dencryption,
48    },
49};
50
51/// EPUB document parser, representing a loaded and parsed EPUB publication
52///
53/// The `EpubDoc` structure is the core of the entire EPUB parsing library.
54/// It encapsulates all the parsing logic and data access interfaces for EPUB files.
55/// It is responsible for parsing various components of an EPUB, including metadata,
56/// manifests, reading order, table of contents navigation, and encrypted information,
57/// and provides methods for accessing this data.
58///
59/// Provides a unified data access interface for EPUB files, hiding the underlying
60/// file structure and parsing details. Strictly adheres to the EPUB specification
61/// in implementing the parsing logic to ensure compatibility with the standard.
62///
63/// ## Usage
64///
65/// ```rust
66/// use lib_epub::epub::EpubDoc;
67///
68/// let doc = EpubDoc::new("./test_case/epub-33.epub");
69/// assert!(doc.is_ok());
70/// ```
71///
72/// ## Notes
73/// - The `EpubDoc` structure is thread-safe.
74/// - The `EpubDoc` structure is immutable, modifying fields in a struct
75///   will not modify the actual document.
76pub struct EpubDoc<R: Read + Seek + Send> {
77    /// The structure of the epub file that actually holds it
78    pub(crate) archive: Arc<Mutex<ZipArchive<R>>>,
79
80    /// The path to the target epub file
81    pub(crate) epub_path: PathBuf,
82
83    /// The path to the OPF file
84    pub package_path: PathBuf,
85
86    /// The path to the directory where the opf file is located
87    pub base_path: PathBuf,
88
89    /// The epub version
90    pub version: EpubVersion,
91
92    /// The unique identifier of the epub file
93    ///
94    /// This identifier is the actual value of the unique-identifier attribute of the package.
95    pub unique_identifier: String,
96
97    /// Epub metadata extracted from OPF
98    pub metadata: Vec<MetadataItem>,
99
100    /// Data in metadata that points to external files
101    pub metadata_link: Vec<MetadataLinkItem>,
102
103    /// A list of resources contained inside an epub extracted from OPF
104    ///
105    /// All resources in the epub file are declared here, and undeclared resources
106    /// should not be stored in the epub file and cannot be obtained from it.
107    ///
108    /// ## Storage Implementation
109    ///
110    /// By default, this field uses [`IndexMap`] to preserve the original declaration
111    /// order from the OPF file, as recommended by the EPUB specification.
112    ///
113    /// To reduce dependencies, you can enable the `no-indexmap` feature to use
114    /// [`HashMap`] instead. Note that this will not preserve the manifest order.
115    ///
116    /// ## EPUB Specification
117    ///
118    /// Per the <https://www.w3.org/TR/epub-33/#sec-manifest>:
119    ///
120    /// > The order of `item` elements within the manifest is significant for
121    /// > fallback chain processing and should be preserved when processing
122    /// > the publication.
123    #[cfg(not(feature = "no-indexmap"))]
124    pub manifest: IndexMap<String, ManifestItem>,
125    #[cfg(feature = "no-indexmap")]
126    pub manifest: HashMap<String, ManifestItem>,
127
128    /// Physical reading order of publications extracted from OPF
129    ///
130    /// This attribute declares the order in which multiple files
131    /// containing published content should be displayed.
132    pub spine: Vec<SpineItem>,
133
134    /// The encryption.xml extracted from the META-INF directory
135    pub encryption: Option<Vec<EncryptionData>>,
136
137    /// The navigation data of the epub file
138    pub catalog: Vec<NavPoint>,
139
140    /// The title of the catalog
141    pub catalog_title: String,
142
143    /// The index of the current reading spine
144    current_spine_index: AtomicUsize,
145
146    /// Whether the epub file contains encryption information
147    has_encryption: bool,
148
149    /// The metadata sheet cache
150    metadata_sheet: OnceLock<MetadataSheet>,
151}
152
153impl<R: Read + Seek + Send> EpubDoc<R> {
154    /// Creates a new EPUB document instance from a reader
155    ///
156    /// This function is responsible for the core logic of parsing EPUB files,
157    /// including verifying the file format, parsing container information,
158    /// loading the OPF package document, and extracting metadata, manifest,
159    /// reading order, and other core information.
160    ///
161    /// ## Parameters
162    /// - `reader`: The data source that implements the `Read` and `Seek` traits,
163    ///   usually a file or memory buffer
164    /// - `epub_path`: The path to the EPUB file, used for path resolution and validation
165    ///
166    /// ## Return
167    /// - `Ok(EpubDoc<R>)`: The successfully parsed EPUB document object
168    /// - `Err(EpubError)`: Errors encountered during parsing
169    ///
170    /// ## Notes
171    /// - This function assumes the EPUB file structure is valid
172    // TODO: 增加对必需的 metadata 的检查
173    pub fn from_reader(reader: R, epub_path: PathBuf) -> Result<Self, EpubError> {
174        // Parsing process
175        // 1. Verify that the ZIP compression method conforms to the EPUB specification
176        // 2. Parse `META-INF/container.xml` retrieves the location of the OPF file
177        // 3. Parses the OPF file to obtain package documentation information
178        // 4. Extracts version information
179        // 5. Parses metadata, manifest, and spine
180        // 6. Parses encrypted information and directory navigation
181        // 7. Verifies and extracts the unique identifier
182
183        let mut archive = ZipArchive::new(reader).map_err(EpubError::from)?;
184        let epub_path = fs::canonicalize(epub_path)?;
185
186        compression_method_check(&mut archive)?;
187
188        let container =
189            get_file_in_zip_archive(&mut archive, "META-INF/container.xml")?.decode()?;
190        let package_path = Self::parse_container(container)?;
191        let base_path = package_path
192            .parent()
193            .expect("the parent directory of the opf file must exist")
194            .to_path_buf();
195
196        let opf_file = get_file_in_zip_archive(
197            &mut archive,
198            package_path
199                .to_str()
200                .expect("package_path should be valid UTF-8"),
201        )?
202        .decode()?;
203        let package = XmlReader::parse(&opf_file)?;
204
205        let version = Self::determine_epub_version(&package)?;
206        let has_encryption = archive
207            .by_path(Path::new("META-INF/encryption.xml"))
208            .is_ok();
209
210        let mut doc = Self {
211            archive: Arc::new(Mutex::new(archive)),
212            epub_path,
213            package_path,
214            base_path,
215            version,
216            unique_identifier: String::new(),
217            metadata: vec![],
218            metadata_link: vec![],
219
220            #[cfg(feature = "no-indexmap")]
221            manifest: HashMap::new(),
222            #[cfg(not(feature = "no-indexmap"))]
223            manifest: IndexMap::new(),
224
225            spine: vec![],
226            encryption: None,
227            catalog: vec![],
228            catalog_title: String::new(),
229            current_spine_index: AtomicUsize::new(0),
230            has_encryption,
231            metadata_sheet: OnceLock::new(),
232        };
233
234        let metadata_element = package.find_elements_by_name("metadata").next().unwrap();
235        let manifest_element = package.find_elements_by_name("manifest").next().unwrap();
236        let spine_element = package.find_elements_by_name("spine").next().unwrap();
237
238        doc.parse_metadata(metadata_element)?;
239        doc.parse_manifest(manifest_element)?;
240        doc.parse_spine(spine_element)?;
241        doc.parse_encryption()?;
242        doc.parse_catalog()?;
243
244        // 断言必有唯一标识符
245        doc.unique_identifier = if let Some(uid) = package.get_attr("unique-identifier") {
246            doc.metadata.iter().find(|item| {
247                item.property == "identifier" && item.id.as_ref().is_some_and(|id| id == &uid)
248            })
249        } else {
250            doc.metadata
251                .iter()
252                .find(|item| item.property == "identifier")
253        }
254        .map(|item| item.value.clone())
255        .ok_or_else(|| EpubError::NonCanonicalFile { tag: "dc:identifier".to_string() })?;
256
257        Ok(doc)
258    }
259
260    /// Parse the EPUB container file (META-INF/container.xml)
261    ///
262    /// This function parses the container information in the EPUB file 、
263    /// to extract the path to the OPF package file. According to the EPUB
264    /// specification, the `container.xml` file must exist in the `META-INF`
265    /// directory and contain at least one `rootfile` element pointing to
266    /// the main OPF file. When multiple `rootfile` elements exist, the first
267    /// element pointing to the OPF file is used as the default.
268    ///
269    /// ## Parameters
270    /// - `data`: The content string of the container.xml
271    ///
272    /// ## Return
273    /// - `Ok(PathBuf)`: The path to the successfully parsed OPF file
274    /// - `Err(EpubError)`: Errors encountered during parsing
275    fn parse_container(data: String) -> Result<PathBuf, EpubError> {
276        let root = XmlReader::parse(&data)?;
277        let rootfile = root
278            .find_elements_by_name("rootfile")
279            .next()
280            .ok_or_else(|| EpubError::NonCanonicalFile { tag: "rootfile".to_string() })?;
281
282        let attr =
283            rootfile
284                .get_attr("full-path")
285                .ok_or_else(|| EpubError::MissingRequiredAttribute {
286                    tag: "rootfile".to_string(),
287                    attribute: "full-path".to_string(),
288                })?;
289
290        Ok(PathBuf::from(attr))
291    }
292
293    /// Parse the EPUB metadata section
294    ///
295    /// This function is responsible for parsing the `<metadata>` elements
296    /// in the OPF file to extract basic information about the publication.
297    /// It handles metadata elements from different namespaces:
298    /// - Elements in the Dublin Core namespace (`http://purl.org/dc/elements/1.1/`)
299    /// - Elements in the OPF namespace (`http://www.idpf.org/2007/opf`)
300    ///
301    /// ## Parameters
302    /// - `metadata_element`: A reference to the `<metadata>` element in the OPF file
303    fn parse_metadata(&mut self, metadata_element: &XmlElement) -> Result<(), EpubError> {
304        const DC_NAMESPACE: &str = "http://purl.org/dc/elements/1.1/";
305        const OPF_NAMESPACE: &str = "http://www.idpf.org/2007/opf";
306
307        let mut metadata = Vec::new();
308        let mut metadata_link = Vec::new();
309        let mut refinements = HashMap::<String, Vec<MetadataRefinement>>::new();
310
311        for element in metadata_element.children() {
312            match &element.namespace {
313                Some(namespace) if namespace == DC_NAMESPACE => {
314                    self.parse_dc_metadata(element, &mut metadata)?
315                }
316
317                Some(namespace) if namespace == OPF_NAMESPACE => self.parse_opf_metadata(
318                    element,
319                    &mut metadata,
320                    &mut metadata_link,
321                    &mut refinements,
322                )?,
323
324                _ => {}
325            };
326        }
327
328        for item in metadata.iter_mut() {
329            if let Some(id) = &item.id {
330                if let Some(refinements) = refinements.remove(id) {
331                    item.refined = refinements;
332                }
333            }
334        }
335
336        self.metadata = metadata;
337        self.metadata_link = metadata_link;
338        Ok(())
339    }
340
341    /// Parse the EPUB manifest section
342    ///
343    /// This function parses the `<manifest>` element in the OPF file, extracting
344    /// information about all resource files in the publication. Each resource contains
345    /// basic information such as id, file path, MIME type, as well as optional
346    /// attributes and fallback resource information.
347    ///
348    /// ## Parameters
349    /// - `manifest_element`: A reference to the `<manifest>` element in the OPF file
350    fn parse_manifest(&mut self, manifest_element: &XmlElement) -> Result<(), EpubError> {
351        let estimated_items = manifest_element.children().count();
352        #[cfg(feature = "no-indexmap")]
353        let mut resources = HashMap::with_capacity(estimated_items);
354        #[cfg(not(feature = "no-indexmap"))]
355        let mut resources = IndexMap::with_capacity(estimated_items);
356
357        for element in manifest_element.children() {
358            let id = element
359                .get_attr("id")
360                .ok_or_else(|| EpubError::MissingRequiredAttribute {
361                    tag: element.tag_name(),
362                    attribute: "id".to_string(),
363                })?
364                .to_string();
365            let path = element
366                .get_attr("href")
367                .ok_or_else(|| EpubError::MissingRequiredAttribute {
368                    tag: element.tag_name(),
369                    attribute: "href".to_string(),
370                })?
371                .to_string();
372            let mime = element
373                .get_attr("media-type")
374                .ok_or_else(|| EpubError::MissingRequiredAttribute {
375                    tag: element.tag_name(),
376                    attribute: "media-type".to_string(),
377                })?
378                .to_string();
379            let properties = element.get_attr("properties");
380            let fallback = element.get_attr("fallback");
381
382            resources.insert(
383                id.clone(),
384                ManifestItem {
385                    id,
386                    path: self.normalize_manifest_path(&path)?,
387                    mime,
388                    properties,
389                    fallback,
390                },
391            );
392        }
393
394        self.manifest = resources;
395        self.validate_fallback_chains();
396        Ok(())
397    }
398
399    /// Parse the EPUB spine section
400    ///
401    /// This function parses the `<spine>` elements in the OPF file to extract
402    /// the reading order information of the publication. The spine defines the
403    /// linear reading order of the publication's content documents, and each
404    /// spine item references resources in the manifest.
405    ///
406    /// ## Parameters
407    /// - `spine_element`: A reference to the `<spine>` element in the OPF file
408    fn parse_spine(&mut self, spine_element: &XmlElement) -> Result<(), EpubError> {
409        let mut spine = Vec::new();
410        for element in spine_element.children() {
411            let idref = element
412                .get_attr("idref")
413                .ok_or_else(|| EpubError::MissingRequiredAttribute {
414                    tag: element.tag_name(),
415                    attribute: "idref".to_string(),
416                })?
417                .to_string();
418            let id = element.get_attr("id");
419            let linear = element
420                .get_attr("linear")
421                .map(|linear| linear == "yes")
422                .unwrap_or(true);
423            let properties = element.get_attr("properties");
424
425            spine.push(SpineItem { idref, id, linear, properties });
426        }
427
428        self.spine = spine;
429        Ok(())
430    }
431
432    /// Parse the EPUB encryption file (META-INF/encryption.xml)
433    ///
434    /// This function is responsible for parsing the `encryption.xml` file
435    /// in the `META-INF` directory to extract information about encrypted
436    /// resources in the publication. According to the EPUB specification,
437    /// the encryption information describes which resources are encrypted
438    /// and the encryption methods used.
439    ///
440    /// TODO: 需要对使用非对称加密数据的加密项进行额外处理,以获取非对称加密密钥
441    fn parse_encryption(&mut self) -> Result<(), EpubError> {
442        if !self.has_encryption() {
443            return Ok(());
444        }
445
446        let mut archive = self.archive.lock()?;
447        let encryption_file =
448            get_file_in_zip_archive(&mut archive, "META-INF/encryption.xml")?.decode()?;
449
450        let root = XmlReader::parse(&encryption_file)?;
451
452        let mut encryption_data = Vec::new();
453        for data in root.children() {
454            if data.name != "EncryptedData" {
455                continue;
456            }
457
458            let method = data
459                .find_elements_by_name("EncryptionMethod")
460                .next()
461                .ok_or_else(|| EpubError::NonCanonicalFile {
462                    tag: "EncryptionMethod".to_string(),
463                })?;
464            let reference = data
465                .find_elements_by_name("CipherReference")
466                .next()
467                .ok_or_else(|| EpubError::NonCanonicalFile {
468                    tag: "CipherReference".to_string(),
469                })?;
470
471            encryption_data.push(EncryptionData {
472                method: method
473                    .get_attr("Algorithm")
474                    .ok_or_else(|| EpubError::MissingRequiredAttribute {
475                        tag: "EncryptionMethod".to_string(),
476                        attribute: "Algorithm".to_string(),
477                    })?
478                    .to_string(),
479                data: reference
480                    .get_attr("URI")
481                    .ok_or_else(|| EpubError::MissingRequiredAttribute {
482                        tag: "CipherReference".to_string(),
483                        attribute: "URI".to_string(),
484                    })?
485                    .to_string(),
486            });
487        }
488
489        if !encryption_data.is_empty() {
490            self.encryption = Some(encryption_data);
491        }
492
493        Ok(())
494    }
495
496    /// Parse the EPUB navigation information
497    ///
498    /// This function is responsible for parsing the navigation information of EPUB
499    /// publications. Different parsing strategies are used depending on the EPUB version:
500    /// - EPUB 2.0: Parses the NCX file to obtain directory information
501    /// - EPUB 3.0: Parses the Navigation Document (NAV) file to obtain directory information
502    fn parse_catalog(&mut self) -> Result<(), EpubError> {
503        const HEAD_TAGS: [&str; 6] = ["h1", "h2", "h3", "h4", "h5", "h6"];
504
505        let mut archive = self.archive.lock()?;
506        match self.version {
507            EpubVersion::Version2_0 => {
508                let opf_file =
509                    get_file_in_zip_archive(&mut archive, self.package_path.to_str().unwrap())?
510                        .decode()?;
511                let opf_element = XmlReader::parse(&opf_file)?;
512
513                let toc_id = opf_element
514                    .find_children_by_name("spine")
515                    .next()
516                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?
517                    .get_attr("toc")
518                    .ok_or_else(|| EpubError::MissingRequiredAttribute {
519                        tag: "spine".to_string(),
520                        attribute: "toc".to_string(),
521                    })?
522                    .to_owned();
523                let toc_path = self
524                    .manifest
525                    .get(&toc_id)
526                    .ok_or(EpubError::ResourceIdNotExist { id: toc_id })?
527                    .path
528                    .to_str()
529                    .unwrap();
530
531                let ncx_file = get_file_in_zip_archive(&mut archive, toc_path)?.decode()?;
532                let ncx = XmlReader::parse(&ncx_file)?;
533
534                match ncx.find_elements_by_name("docTitle").next() {
535                    Some(element) => self.catalog_title = element.text(),
536                    None => log::warn!(
537                        "Expecting to get docTitle information from the ncx file, but it's missing."
538                    ),
539                };
540
541                let nav_map = ncx
542                    .find_elements_by_name("navMap")
543                    .next()
544                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: "navMap".to_string() })?;
545
546                self.catalog = self.parse_nav_points(nav_map)?;
547
548                Ok(())
549            }
550
551            EpubVersion::Version3_0 => {
552                let nav_path = self
553                    .manifest
554                    .values()
555                    .find(|item| {
556                        if let Some(property) = &item.properties {
557                            return property.contains("nav");
558                        }
559                        false
560                    })
561                    .map(|item| item.path.clone())
562                    .ok_or_else(|| EpubError::NonCanonicalEpub {
563                        expected_file: "Navigation Document".to_string(),
564                    })?;
565
566                let nav_file =
567                    get_file_in_zip_archive(&mut archive, nav_path.to_str().unwrap())?.decode()?;
568
569                let nav_element = XmlReader::parse(&nav_file)?;
570                let nav = nav_element
571                    .find_elements_by_name("nav")
572                    .find(|&element| element.get_attr("epub:type") == Some(String::from("toc")))
573                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: "nav".to_string() })?;
574                let nav_title = nav.find_children_by_names(&HEAD_TAGS).next();
575                let nav_list = nav
576                    .find_children_by_name("ol")
577                    .next()
578                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: "ol".to_string() })?;
579
580                self.catalog = self.parse_catalog_list(nav_list)?;
581                if let Some(nav_title) = nav_title {
582                    self.catalog_title = nav_title.text();
583                };
584                Ok(())
585            }
586        }
587    }
588
589    /// Check if the EPUB file contains `encryption.xml`
590    ///
591    /// This function determines whether a publication contains encrypted resources
592    /// by checking if a `META-INF/encryption.xml` file exists in the EPUB package.
593    /// According to the EPUB specification, when resources in a publication are
594    /// encrypted, the corresponding encryption information must be declared in
595    /// the `META-INF/encryption.xml` file.
596    ///
597    /// ## Return
598    /// - `true` if the publication contains encrypted resources
599    /// - `false` if the publication does not contain encrypted resources
600    ///
601    /// ## Notes
602    /// - This function only checks the existence of the encrypted file;
603    ///   it does not verify the validity of the encrypted information.
604    #[inline]
605    pub fn has_encryption(&self) -> bool {
606        self.has_encryption
607    }
608
609    /// Retrieves a list of metadata items
610    ///
611    /// This function retrieves all matching metadata items from the EPUB metadata
612    /// based on the specified attribute name (key). Metadata items may come from
613    /// the DC (Dublin Core) namespace or the OPF namespace and contain basic
614    /// information about the publication, such as title, author, identifier, etc.
615    ///
616    /// ## Parameters
617    /// - `key`: The name of the metadata attribute to retrieve
618    ///
619    /// ## Return
620    /// - `Some(Vec<MetadataItem>)`: A vector containing all matching metadata items
621    /// - `None`: If no matching metadata items are found
622    pub fn get_metadata(&self, key: &str) -> Option<Vec<MetadataItem>> {
623        let metadatas = self
624            .metadata
625            .iter()
626            .filter(|item| item.property == key)
627            .cloned()
628            .collect::<Vec<MetadataItem>>();
629
630        (!metadatas.is_empty()).then_some(metadatas)
631    }
632
633    /// Retrieves a list of values for specific metadata items
634    ///
635    /// This function retrieves the values ​​of all matching metadata items from
636    /// the EPUB metadata based on the given property name (key).
637    ///
638    /// ## Parameters
639    /// - `key`: The name of the metadata attribute to retrieve
640    ///
641    /// ## Return
642    /// - `Some(Vec<String>)`: A vector containing all matching metadata item values
643    /// - `None`: If no matching metadata items are found
644    pub fn get_metadata_value(&self, key: &str) -> Option<Vec<String>> {
645        let values = self
646            .metadata
647            .iter()
648            .filter(|item| item.property == key)
649            .map(|item| item.value.clone())
650            .collect::<Vec<String>>();
651
652        (!values.is_empty()).then_some(values)
653    }
654
655    /// Retrieves the title of the publication
656    ///
657    /// This function retrieves all title information from the EPUB metadata.
658    /// According to the EPUB specification, a publication can have multiple titles,
659    /// which are returned in the order they appear in the metadata.
660    ///
661    /// ## Return
662    /// - `Result<Vec<String>, EpubError>`: A vector containing all title information
663    /// - `EpubError`: If and only if the OPF file does not contain `<dc:title>`
664    ///
665    /// ## Notes
666    /// - The EPUB specification requires each publication to have at least one title.
667    #[inline]
668    pub fn get_title(&self) -> Vec<String> {
669        self.get_metadata_value("title")
670            .expect("missing required 'title' metadata which is required by the EPUB specification")
671    }
672
673    /// Retrieves the language used in the publication
674    ///
675    /// This function retrieves the language information of a publication from the EPUB
676    /// metadata. According to the EPUB specification, language information identifies
677    /// the primary language of the publication and can have multiple language identifiers.
678    ///
679    /// ## Return
680    /// - `Ok(Vec<String>)`: A vector containing all language identifiers
681    /// - `Err(EpubError)`: If and only if the OPF file does not contain `<dc:language>`
682    ///
683    /// ## Notes
684    /// - The EPUB specification requires that each publication specify at least one primary language.
685    /// - Language identifiers should conform to RFC 3066 or later standards.
686    #[inline]
687    pub fn get_language(&self) -> Vec<String> {
688        self.get_metadata_value("language").expect(
689            "missing required 'language' metadata which is required by the EPUB specification",
690        )
691    }
692
693    /// Retrieves the identifier of a publication
694    ///
695    /// This function retrieves the identifier information of a publication from
696    /// the EPUB metadata. According to the EPUB specification, each publication
697    /// must have a identifier, typically an ISBN, UUID, or other unique identifier.
698    ///
699    /// ## Return
700    /// - `Ok(Vec<String>)`: A vector containing all identifier information
701    /// - `Err(EpubError)`: If and only if the OPF file does not contain `<dc:identifier>`
702    ///
703    /// ## Notes
704    /// - The EPUB specification requires each publication to have at least one identifier.
705    /// - In the OPF file, the `unique-identifier` attribute of the `<package>` element
706    ///   should point to a `<dc:identifier>` element used to uniquely identify the publication.
707    ///   This means that `unique-identifier` is not exactly equal to `<dc:identifier>`.
708    #[inline]
709    pub fn get_identifier(&self) -> Vec<String> {
710        self.get_metadata_value("identifier").expect(
711            "missing required 'identifier' metadata which is required by the EPUB specification",
712        )
713    }
714
715    /// Retrieves a unified metadata sheet from the EPUB publication
716    ///
717    /// This function consolidates all metadata from the EPUB into a single `MetadataSheet`
718    /// structure, providing a simplified interface for metadata access. It handles both
719    /// EPUB 2 and EPUB 3 metadata formats, including refinements from EPUB 3.
720    ///
721    /// ## Return
722    /// - `MetadataSheet`: A populated metadata sheet containing all publication metadata
723    ///
724    /// ## Notes
725    /// - Multi-value metadata (title, creator, etc.) are stored in Vec fields in order
726    /// - Date metadata extracts event type from refinements (e.g., "publication", "modification")
727    /// - Identifier metadata uses item IDs as keys in the HashMap
728    pub fn get_metadata_sheet(&self) -> &MetadataSheet {
729        self.metadata_sheet.get_or_init(|| {
730            let mut sheet = MetadataSheet::new();
731            for item in &self.metadata {
732                let value = item.value.clone();
733
734                match item.property.as_str() {
735                    "title" => {
736                        sheet.title.push(value);
737                    }
738                    "creator" => {
739                        sheet.creator.push(value);
740                    }
741                    "contributor" => {
742                        sheet.contributor.push(value);
743                    }
744                    "subject" => {
745                        sheet.subject.push(value);
746                    }
747                    "language" => {
748                        sheet.language.push(value);
749                    }
750                    "relation" => {
751                        sheet.relation.push(value);
752                    }
753                    "date" => {
754                        let event = item
755                            .refined
756                            .iter()
757                            .filter_map(|refine| {
758                                if refine.property.eq("event") {
759                                    Some(refine.value.clone())
760                                } else {
761                                    None
762                                }
763                            })
764                            .next()
765                            .unwrap_or_default();
766                        sheet.date.insert(value, event);
767                    }
768                    "identifier" => {
769                        let id = item.id.clone().unwrap_or_default();
770                        sheet.identifier.insert(id, value);
771                    }
772                    "description" => {
773                        sheet.description = value;
774                    }
775                    "format" => {
776                        sheet.format = value;
777                    }
778                    "publisher" => {
779                        sheet.publisher = value;
780                    }
781                    "rights" => {
782                        sheet.rights = value;
783                    }
784                    "source" => {
785                        sheet.source = value;
786                    }
787                    "ccoverage" => {
788                        sheet.coverage = value;
789                    }
790                    "type" => {
791                        sheet.epub_type = value;
792                    }
793                    _ => {}
794                };
795            }
796            sheet
797        })
798    }
799
800    /// Retrieve resource data by resource ID
801    ///
802    /// This function will find the resource with the specified ID in the manifest.
803    /// If the resource is encrypted, it will be automatically decrypted.
804    ///
805    /// ## Parameters
806    /// - `id`: The ID of the resource to retrieve
807    ///
808    /// ## Return
809    /// - `Ok((Vec<u8>, String))`: Successfully retrieved and decrypted resource data and
810    ///   the MIME type
811    /// - `Err(EpubError)`: Errors that occurred during the retrieval process
812    ///
813    /// ## Notes
814    /// - This function will automatically decrypt the resource if it is encrypted.
815    /// - For unsupported encryption methods, the corresponding error will be returned.
816    pub fn get_manifest_item(&self, id: &str) -> Result<(Vec<u8>, String), EpubError> {
817        let resource_item = self
818            .manifest
819            .get(id)
820            .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
821
822        self.get_resource(resource_item)
823    }
824
825    /// Retrieves resource item data by resource path
826    ///
827    /// This function retrieves resources from the manifest based on the input path.
828    /// The input path must be a relative path to the root directory of the EPUB container;
829    /// using an absolute path or a relative path to another location will result in an error.
830    ///
831    /// ## Parameters
832    /// - `path`: The path of the resource to retrieve
833    ///
834    /// ## Return
835    /// - `Ok((Vec<u8>, String))`: Successfully retrieved and decrypted resource data and
836    ///   the MIME type
837    /// - `Err(EpubError)`: Errors that occurred during the retrieval process
838    ///
839    /// ## Notes
840    /// - This function will automatically decrypt the resource if it is encrypted.
841    /// - For unsupported encryption methods, the corresponding error will be returned.
842    /// - Relative paths other than the root directory of the Epub container are not supported.
843    pub fn get_manifest_item_by_path(&self, path: &str) -> Result<(Vec<u8>, String), EpubError> {
844        let manifest = self
845            .manifest
846            .iter()
847            .find(|(_, item)| item.path.to_str().unwrap() == path)
848            .map(|(_, manifest)| manifest)
849            .ok_or_else(|| EpubError::ResourceNotFound { resource: path.to_string() })?;
850
851        self.get_resource(manifest)
852    }
853
854    /// Retrieves supported resource items by resource ID, with fallback mechanism supported
855    ///
856    /// This function attempts to retrieve the resource item with the specified ID and
857    /// checks if its MIME type is in the list of supported formats. If the current resource
858    /// format is not supported, it searches for a supported resource format along the
859    /// fallback chain according to the fallback mechanism defined in the EPUB specification.
860    ///
861    /// ## Parameters
862    /// - `id`: The ID of the resource to retrieve
863    /// - `supported_format`: A vector of supported MIME types
864    ///
865    /// ## Return
866    /// - `Ok((Vec<u8>, String))`: Successfully retrieved and decrypted resource data and
867    ///   the MIME type
868    /// - `Err(EpubError)`: Errors that occurred during the retrieval process
869    pub fn get_manifest_item_with_fallback(
870        &self,
871        id: &str,
872        supported_format: &[&str],
873    ) -> Result<(Vec<u8>, String), EpubError> {
874        let mut current_id = id;
875        let mut fallback_chain = Vec::<&str>::new();
876        'fallback: loop {
877            let manifest_item = self
878                .manifest
879                .get(current_id)
880                .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
881
882            if supported_format.contains(&manifest_item.mime.as_str()) {
883                return self.get_resource(manifest_item);
884            }
885
886            let fallback_id = match &manifest_item.fallback {
887                // The loop ends when no fallback resource exists
888                None => break 'fallback,
889
890                // End the loop when the loop continues to fallback if a fallback resource exists
891                Some(id) if fallback_chain.contains(&id.as_str()) => break 'fallback,
892
893                Some(id) => {
894                    fallback_chain.push(id.as_str());
895
896                    // Since only warnings are issued for fallback resource checks
897                    // during initialization, the issue of fallback resources possibly
898                    // not existing needs to be handled here.
899                    id.as_str()
900                }
901            };
902
903            current_id = fallback_id;
904        }
905
906        Err(EpubError::NoSupportedFileFormat)
907    }
908
909    /// Retrieves the cover of the EPUB document
910    ///
911    /// This function searches for the cover of the EPUB document by examining manifest
912    /// items in the manifest. It looks for manifest items whose ID or attribute contains
913    /// "cover" (case-insensitive) and attempts to retrieve the content of the first match.
914    ///
915    /// ## Return
916    /// - `Some((Vec<u8>, String))`: Successfully retrieved and decrypted cover data and
917    ///   the MIME type
918    /// - `None`: No cover resource was found
919    ///
920    /// ## Notes
921    /// - This function only returns the first successfully retrieved cover resource,
922    ///   even if multiple matches exist
923    /// - The retrieved cover may not be an image resource; users need to pay attention
924    ///   to the resource's MIME type.
925    pub fn get_cover(&self) -> Option<(Vec<u8>, String)> {
926        self.manifest
927            .values()
928            .filter(|manifest| {
929                manifest.id.to_ascii_lowercase().contains("cover")
930                    || manifest
931                        .properties
932                        .as_ref()
933                        .map(|properties| properties.to_ascii_lowercase().contains("cover"))
934                        .unwrap_or(false)
935            })
936            .find_map(|manifest| {
937                self.get_resource(manifest)
938                    .map_err(|err| log::warn!("{err}"))
939                    .ok()
940            })
941    }
942
943    /// Retrieves resource data by manifest item
944    fn get_resource(&self, resource_item: &ManifestItem) -> Result<(Vec<u8>, String), EpubError> {
945        let path = resource_item
946            .path
947            .to_str()
948            .expect("manifest item path should be valid UTF-8");
949
950        let mut archive = self.archive.lock()?;
951        let mut data = match archive.by_name(path) {
952            Ok(mut file) => {
953                let mut entry = Vec::<u8>::new();
954                file.read_to_end(&mut entry)?;
955                Ok(entry)
956            }
957            Err(ZipError::FileNotFound) => {
958                Err(EpubError::ResourceNotFound { resource: path.to_string() })
959            }
960            Err(err) => Err(EpubError::from(err)),
961        }?;
962
963        if let Some(method) = self.is_encryption_file(path) {
964            data = self.auto_dencrypt(&method, &mut data)?;
965        }
966
967        Ok((data, resource_item.mime.clone()))
968    }
969
970    /// Navigate to a specified chapter using the spine index
971    ///
972    /// This function retrieves the content data of the corresponding chapter based
973    /// on the index position in the EPUB spine. The spine defines the linear reading
974    /// order of the publication's content documents, and each spine item references
975    /// resources in the manifest.
976    ///
977    /// ## Parameters
978    /// - `index`: The index position in the spine, starting from 0
979    ///
980    /// ## Return
981    /// - `Some((Vec<u8>, String))`: Successfully retrieved chapter content data and the MIME type
982    /// - `None`: Index out of range or data retrieval error
983    ///
984    /// ## Notes
985    /// - The index must be less than the total number of spine projects.
986    /// - If the resource is encrypted, it will be automatically decrypted before returning.
987    /// - It does not check whether the Spine project follows a linear reading order.
988    pub fn navigate_by_spine_index(&self, index: usize) -> Option<(Vec<u8>, String)> {
989        if index >= self.spine.len() {
990            return None;
991        }
992
993        let manifest_id = self.spine[index].idref.as_ref();
994        self.current_spine_index.store(index, Ordering::SeqCst);
995        self.get_manifest_item(manifest_id)
996            .map_err(|err| log::warn!("{err}"))
997            .ok()
998    }
999
1000    /// Navigate to the previous linear reading chapter
1001    ///
1002    /// This function searches backwards in the EPUB spine for the previous linear
1003    /// reading chapter and returns the content data of that chapter. It only navigates
1004    /// to chapters marked as linear reading.
1005    ///
1006    /// ## Return
1007    /// - `Some((Vec<u8>, String))`: Successfully retrieved previous chapter content data and
1008    ///   the MIME type
1009    /// - `None`: Already in the first chapter, the current chapter is not linear,
1010    ///   or data retrieval failed
1011    pub fn spine_prev(&self) -> Option<(Vec<u8>, String)> {
1012        let current_index = self.current_spine_index.load(Ordering::SeqCst);
1013        if current_index == 0 || !self.spine[current_index].linear {
1014            return None;
1015        }
1016
1017        let prev_index = (0..current_index)
1018            .rev()
1019            .find(|&index| self.spine[index].linear)?;
1020
1021        self.current_spine_index.store(prev_index, Ordering::SeqCst);
1022        let manifest_id = self.spine[prev_index].idref.as_ref();
1023        self.get_manifest_item(manifest_id)
1024            .map_err(|err| log::warn!("{err}"))
1025            .ok()
1026    }
1027
1028    /// Navigate to the next linear reading chapter
1029    ///
1030    /// This function searches forwards in the EPUB spine for the next linear reading
1031    /// chapter and returns the content data of that chapter. It only navigates to
1032    /// chapters marked as linear reading.
1033    ///
1034    /// ## Return
1035    /// - `Some((Vec<u8>, String))`: Successfully retrieved next chapter content data and
1036    ///   the MIME type
1037    /// - `None`: Already in the last chapter, the current chapter is not linear,
1038    ///   or data retrieval failed
1039    pub fn spine_next(&self) -> Option<(Vec<u8>, String)> {
1040        let current_index = self.current_spine_index.load(Ordering::SeqCst);
1041        if current_index >= self.spine.len() - 1 || !self.spine[current_index].linear {
1042            return None;
1043        }
1044
1045        let next_index =
1046            (current_index + 1..self.spine.len()).find(|&index| self.spine[index].linear)?;
1047
1048        self.current_spine_index.store(next_index, Ordering::SeqCst);
1049        let manifest_id = self.spine[next_index].idref.as_ref();
1050        self.get_manifest_item(manifest_id)
1051            .map_err(|err| log::warn!("{err}"))
1052            .ok()
1053    }
1054
1055    /// Retrieves the content data of the current chapter
1056    ///
1057    /// This function returns the content data of the chapter at the current
1058    /// index position in the EPUB spine.
1059    ///
1060    /// ## Return
1061    /// - `Some((Vec<u8>, String))`: Successfully retrieved current chapter content data and
1062    ///   the MIME type
1063    /// - `None`: Data retrieval failed
1064    pub fn spine_current(&self) -> Option<(Vec<u8>, String)> {
1065        let manifest_id = self.spine[self.current_spine_index.load(Ordering::SeqCst)]
1066            .idref
1067            .as_ref();
1068        self.get_manifest_item(manifest_id)
1069            .map_err(|err| log::warn!("{err}"))
1070            .ok()
1071    }
1072
1073    /// Determine the EPUB version from the OPF file
1074    ///
1075    /// This function is used to detect the version of an epub file from an OPF file.
1076    /// When the version attribute in the package is abnormal, version information will
1077    /// be identified through some version characteristics of the epub file. An error is
1078    /// returned when neither direct nor indirect methods can identify the version.
1079    ///
1080    /// ## Parameters
1081    /// - `opf_element`: A reference to the OPF file element
1082    fn determine_epub_version(opf_element: &XmlElement) -> Result<EpubVersion, EpubError> {
1083        // Check the explicit version attribute
1084        if let Some(version) = opf_element.get_attr("version") {
1085            match version.as_str() {
1086                "2.0" => return Ok(EpubVersion::Version2_0),
1087                "3.0" => return Ok(EpubVersion::Version3_0),
1088                _ => {}
1089            }
1090        }
1091
1092        let spine_element = opf_element
1093            .find_elements_by_name("spine")
1094            .next()
1095            .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?;
1096
1097        // Look for EPUB 2.x specific features
1098        if spine_element.get_attr("toc").is_some() {
1099            return Ok(EpubVersion::Version2_0);
1100        }
1101
1102        let manifest_element = opf_element
1103            .find_elements_by_name("manifest")
1104            .next()
1105            .ok_or_else(|| EpubError::NonCanonicalFile { tag: "manifest".to_string() })?;
1106
1107        // Look for EPUB 3.x specific features
1108        manifest_element
1109            .children()
1110            .find_map(|element| {
1111                if let Some(id) = element.get_attr("id") {
1112                    if id.eq("nav") {
1113                        return Some(EpubVersion::Version3_0);
1114                    }
1115                }
1116
1117                None
1118            })
1119            .ok_or(EpubError::UnrecognizedEpubVersion)
1120    }
1121
1122    /// Parse metadata elements under the Dublin Core namespace
1123    ///
1124    /// This function handles the `<metadata>` Dublin Core element in the OPF file (namespace
1125    /// is "http://purl.org/dc/elements/1.1/"). These elements usually contain the basic
1126    /// information of the publication, such as title, author, publication date, etc.
1127    ///
1128    /// ## Notes
1129    /// - In EPUB 3.0, granular information is handled by separate '<meta>' elements and 'refines' attributes
1130    /// - All text content is normalized by whitespace
1131    #[inline]
1132    fn parse_dc_metadata(
1133        &self,
1134        element: &XmlElement,
1135        metadata: &mut Vec<MetadataItem>,
1136        // refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1137    ) -> Result<(), EpubError> {
1138        let id = element.get_attr("id");
1139        let lang = element.get_attr("lang");
1140        let property = element.name.clone();
1141        let value = element.text().normalize_whitespace();
1142
1143        let refined = match self.version {
1144            // In EPUB 2.0, supplementary metadata (refinements) are represented
1145            // through other attribute data pairs of the tag.
1146            EpubVersion::Version2_0 => element
1147                .attributes
1148                .iter()
1149                .map(|(name, value)| {
1150                    let property = name.to_string();
1151                    let value = value.to_string().normalize_whitespace();
1152
1153                    MetadataRefinement {
1154                        refines: id.clone().unwrap(),
1155                        property,
1156                        value,
1157                        lang: None,
1158                        scheme: None,
1159                    }
1160                })
1161                .collect(),
1162            EpubVersion::Version3_0 => vec![],
1163        };
1164
1165        metadata.push(MetadataItem { id, property, value, lang, refined });
1166
1167        Ok(())
1168    }
1169
1170    /// Parse metadata elements under the OPF namespace
1171    ///
1172    /// This function handles the `<metadata>` OPF element in the OPF file (namespace
1173    /// is "http://www.idpf.org/2007/opf"). These elements include '<meta>' and '<link>',
1174    /// which are used to provide extended metadata and links to external resources for EPUB publications.
1175    ///
1176    /// ## Notes
1177    /// - The function is only responsible for distribution processing, and the
1178    ///   specific parsing logic is implemented in the dedicated function
1179    /// - All parsing results are added directly to the incoming collection and no new collection is returned
1180    #[inline]
1181    fn parse_opf_metadata(
1182        &self,
1183        element: &XmlElement,
1184        metadata: &mut Vec<MetadataItem>,
1185        metadata_link: &mut Vec<MetadataLinkItem>,
1186        refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1187    ) -> Result<(), EpubError> {
1188        match element.name.as_str() {
1189            "meta" => self.parse_meta_element(element, metadata, refinements),
1190            "link" => self.parse_link_element(element, metadata_link),
1191            _ => Ok(()),
1192        }
1193    }
1194
1195    #[inline]
1196    fn parse_meta_element(
1197        &self,
1198        element: &XmlElement,
1199        metadata: &mut Vec<MetadataItem>,
1200        refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1201    ) -> Result<(), EpubError> {
1202        match self.version {
1203            EpubVersion::Version2_0 => {
1204                let property = element
1205                    .get_attr("name")
1206                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: element.tag_name() })?;
1207                let value = element
1208                    .get_attr("content")
1209                    .ok_or_else(|| EpubError::MissingRequiredAttribute {
1210                        tag: element.tag_name(),
1211                        attribute: "content".to_string(),
1212                    })?
1213                    .normalize_whitespace();
1214
1215                metadata.push(MetadataItem {
1216                    id: None,
1217                    property,
1218                    value,
1219                    lang: None,
1220                    refined: vec![],
1221                });
1222            }
1223
1224            EpubVersion::Version3_0 => {
1225                let property = element.get_attr("property").ok_or_else(|| {
1226                    EpubError::MissingRequiredAttribute {
1227                        tag: element.tag_name(),
1228                        attribute: "property".to_string(),
1229                    }
1230                })?;
1231                let value = element.text().normalize_whitespace();
1232                let lang = element.get_attr("lang");
1233
1234                if let Some(refines) = element.get_attr("refines") {
1235                    let id = refines.strip_prefix("#").unwrap_or(&refines).to_string();
1236                    let scheme = element.get_attr("scheme");
1237                    let refinement = MetadataRefinement {
1238                        refines: id.clone(),
1239                        property,
1240                        value,
1241                        lang,
1242                        scheme,
1243                    };
1244
1245                    if let Some(refinements) = refinements.get_mut(&id) {
1246                        refinements.push(refinement);
1247                    } else {
1248                        refinements.insert(id, vec![refinement]);
1249                    }
1250                } else {
1251                    let id = element.get_attr("id");
1252                    let item = MetadataItem {
1253                        id,
1254                        property,
1255                        value,
1256                        lang,
1257                        refined: vec![],
1258                    };
1259
1260                    metadata.push(item);
1261                };
1262            }
1263        }
1264        Ok(())
1265    }
1266
1267    #[inline]
1268    fn parse_link_element(
1269        &self,
1270        element: &XmlElement,
1271        metadata_link: &mut Vec<MetadataLinkItem>,
1272    ) -> Result<(), EpubError> {
1273        let href = element
1274            .get_attr("href")
1275            .ok_or_else(|| EpubError::MissingRequiredAttribute {
1276                tag: element.tag_name(),
1277                attribute: "href".to_string(),
1278            })?;
1279        let rel = element
1280            .get_attr("rel")
1281            .ok_or_else(|| EpubError::MissingRequiredAttribute {
1282                tag: element.tag_name(),
1283                attribute: "rel".to_string(),
1284            })?;
1285        let hreflang = element.get_attr("hreflang");
1286        let id = element.get_attr("id");
1287        let mime = element.get_attr("media-type");
1288        let properties = element.get_attr("properties");
1289
1290        metadata_link.push(MetadataLinkItem {
1291            href,
1292            rel,
1293            hreflang,
1294            id,
1295            mime,
1296            properties,
1297            refines: None,
1298        });
1299        Ok(())
1300    }
1301
1302    /// Recursively parse NCX navigation points from navMap or nested navPoint elements
1303    ///
1304    /// This function parses the hierarchical navigation structure defined in NCX files
1305    /// for EPUB 2.x documents. It handles nested navPoint elements to build a complete
1306    /// tree representation of the publication's table of contents.
1307    fn parse_nav_points(&self, parent_element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1308        let mut nav_points = Vec::new();
1309        for nav_point in parent_element.find_children_by_name("navPoint") {
1310            let label = match nav_point.find_children_by_name("navLabel").next() {
1311                Some(element) => element.text(),
1312                None => String::new(),
1313            };
1314
1315            let content = nav_point
1316                .find_children_by_name("content")
1317                .next()
1318                .map(|element| PathBuf::from(element.text()));
1319
1320            let play_order = nav_point
1321                .get_attr("playOrder")
1322                .and_then(|order| order.parse::<usize>().ok());
1323
1324            let children = self.parse_nav_points(nav_point)?;
1325
1326            nav_points.push(NavPoint { label, content, play_order, children });
1327        }
1328
1329        nav_points.sort();
1330        Ok(nav_points)
1331    }
1332
1333    /// Recursively parses directory list structures
1334    ///
1335    /// This function recursively parses HTML navigation list structures,
1336    /// converting `<ol>` and `<li>` elements into NavPoint structures.
1337    /// Multi-level nested directory structures are supported.
1338    fn parse_catalog_list(&self, element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1339        let mut catalog = Vec::new();
1340        for item in element.children() {
1341            if item.tag_name() != "li" {
1342                return Err(EpubError::NonCanonicalFile { tag: "li".to_string() });
1343            }
1344
1345            let title_element = item
1346                .find_children_by_names(&["span", "a"])
1347                .next()
1348                .ok_or_else(|| EpubError::NonCanonicalFile { tag: "span/a".to_string() })?;
1349            let content_href = title_element.get_attr("href").map(PathBuf::from);
1350            let sub_list = if let Some(list) = item.find_children_by_name("ol").next() {
1351                self.parse_catalog_list(list)?
1352            } else {
1353                vec![]
1354            };
1355
1356            catalog.push(NavPoint {
1357                label: title_element.text(),
1358                content: content_href,
1359                children: sub_list,
1360                play_order: None,
1361            });
1362        }
1363
1364        Ok(catalog)
1365    }
1366
1367    /// Converts relative paths in the manifest to normalized paths
1368    /// relative to the EPUB root directory
1369    ///
1370    /// This function processes the href attribute of resources in the EPUB
1371    /// manifest and converts it to a normalized path representation.
1372    /// It handles three types of paths:
1373    /// - Relative paths starting with `../` (checks if they exceed the EPUB package scope)
1374    /// - Absolute paths starting with `/` (relative to the EPUB root directory)
1375    /// - Other relative paths (relative to the directory containing the OPF file)
1376    ///
1377    /// ## Parameters
1378    /// - `path`: The href attribute value of the resource in the manifest
1379    ///
1380    /// ## Return
1381    /// - `Ok(PathBuf)`: The parsed normalized path
1382    /// - `Err(EpubError)`: Relative link leakage
1383    #[inline]
1384    fn normalize_manifest_path(&self, path: &str) -> Result<PathBuf, EpubError> {
1385        let path = if path.starts_with("../") {
1386            let mut current_dir = self.epub_path.join(&self.package_path);
1387            current_dir.pop();
1388
1389            check_realtive_link_leakage(self.epub_path.clone(), current_dir, path)
1390                .map(PathBuf::from)
1391                .ok_or_else(|| EpubError::RelativeLinkLeakage { path: path.to_string() })?
1392        } else if let Some(stripped) = path.strip_prefix("/") {
1393            PathBuf::from(stripped.to_string())
1394        } else {
1395            self.base_path.join(path)
1396        };
1397
1398        #[cfg(windows)]
1399        let path = PathBuf::from(path.to_string_lossy().replace('\\', "/"));
1400
1401        Ok(path)
1402    }
1403
1404    /// Verify the fallback chain of all manifest items
1405    ///
1406    /// This function iterates through all manifest items with the fallback
1407    /// attribute and verifies the validity of their fallback chains, including checking:
1408    /// - Whether circular references exist
1409    /// - Whether the fallback resource exists in the manifest
1410    ///
1411    /// ## Notes
1412    /// If an invalid fallback chain is found, a warning log will be logged
1413    /// but the processing flow will not be interrupted.
1414    // TODO: consider using BFS to validate fallback chains, to provide efficient
1415    fn validate_fallback_chains(&self) {
1416        for (id, item) in &self.manifest {
1417            if item.fallback.is_none() {
1418                continue;
1419            }
1420
1421            let mut fallback_chain = Vec::new();
1422            if let Err(msg) = self.validate_fallback_chain(id, &mut fallback_chain) {
1423                log::warn!("Invalid fallback chain for item {}: {}", id, msg);
1424            }
1425        }
1426    }
1427
1428    /// Recursively verify the validity of a single fallback chain
1429    ///
1430    /// This function recursively traces the fallback chain to check for the following issues:
1431    /// - Circular reference
1432    /// - The referenced fallback resource does not exist
1433    ///
1434    /// ## Parameters
1435    /// - `manifest_id`: The id of the manifest item currently being verified
1436    /// - `fallback_chain`: The visited fallback chain paths used to detect circular references
1437    ///
1438    /// ## Return
1439    /// - `Ok(())`: The fallback chain is valid
1440    /// - `Err(String)`: A string containing error information
1441    fn validate_fallback_chain(
1442        &self,
1443        manifest_id: &str,
1444        fallback_chain: &mut Vec<String>,
1445    ) -> Result<(), String> {
1446        if fallback_chain.contains(&manifest_id.to_string()) {
1447            fallback_chain.push(manifest_id.to_string());
1448
1449            return Err(format!(
1450                "Circular reference detected in fallback chain for {}",
1451                fallback_chain.join("->")
1452            ));
1453        }
1454
1455        // Get the current item; its existence can be ensured based on the calling context.
1456        let item = self.manifest.get(manifest_id).unwrap();
1457
1458        if let Some(fallback_id) = &item.fallback {
1459            if !self.manifest.contains_key(fallback_id) {
1460                return Err(format!(
1461                    "Fallback resource {} does not exist in manifest",
1462                    fallback_id
1463                ));
1464            }
1465
1466            fallback_chain.push(manifest_id.to_string());
1467            self.validate_fallback_chain(fallback_id, fallback_chain)
1468        } else {
1469            // The end of the fallback chain
1470            Ok(())
1471        }
1472    }
1473
1474    /// Checks if a resource at the specified path is an encrypted file
1475    ///
1476    /// This function queries whether a specific resource path is marked as an encrypted
1477    /// file in the EPUB encryption information. It checks the encrypted data stored in
1478    /// `self.encryption`, looking for an entry that matches the given path.
1479    ///
1480    /// ## Parameters
1481    /// - `path`: The path of the resource to check
1482    ///
1483    /// ## Return
1484    /// - `Some(String)`: The encryption method used for the resource
1485    /// - `None`: The resource is not encrypted
1486    fn is_encryption_file(&self, path: &str) -> Option<String> {
1487        self.encryption.as_ref().and_then(|encryptions| {
1488            encryptions
1489                .iter()
1490                .find(|encryption| encryption.data == path)
1491                .map(|encryption| encryption.method.clone())
1492        })
1493    }
1494
1495    /// Automatically decrypts encrypted resource data
1496    ///
1497    /// Automatically decrypts data based on the provided encryption method.
1498    /// This function supports various encryption methods defined by the EPUB
1499    /// specification, including font obfuscation and the XML encryption standard.
1500    ///
1501    /// ## Parameters
1502    /// - `method`: The encryption method used for the resource
1503    /// - `data`: The encrypted resource data
1504    ///
1505    /// ## Return
1506    /// - `Ok(Vec<u8>)`: The decrypted resource data
1507    /// - `Err(EpubError)`: Unsupported encryption method
1508    ///
1509    /// ## Supported Encryption Methods
1510    /// - IDPF font obfuscation: `http://www.idpf.org/2008/embedding`
1511    /// - Adobe font obfuscation: `http://ns.adobe.com/pdf/enc#RC`
1512    #[inline]
1513    fn auto_dencrypt(&self, method: &str, data: &mut [u8]) -> Result<Vec<u8>, EpubError> {
1514        match method {
1515            "http://www.idpf.org/2008/embedding" => {
1516                Ok(idpf_font_dencryption(data, &self.unique_identifier))
1517            }
1518            "http://ns.adobe.com/pdf/enc#RC" => {
1519                Ok(adobe_font_dencryption(data, &self.unique_identifier))
1520            }
1521            _ => Err(EpubError::UnsupportedEncryptedMethod { method: method.to_string() }),
1522        }
1523    }
1524}
1525
1526impl EpubDoc<BufReader<File>> {
1527    /// Creates a new EPUB document instance
1528    ///
1529    /// This function is a convenience constructor for `EpubDoc`,
1530    /// used to create an EPUB parser instance directly from a file path.
1531    ///
1532    /// ## Parameters
1533    /// - `path`: The path to the EPUB file
1534    ///
1535    /// ## Return
1536    /// - `Ok(EpubDoc)`: The created EPUB document instance
1537    /// - `Err(EpubError)`: An error occurred during initialization
1538    pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, EpubError> {
1539        let file = File::open(&path).map_err(EpubError::from)?;
1540        let path = fs::canonicalize(path)?;
1541
1542        Self::from_reader(BufReader::new(file), path)
1543    }
1544
1545    /// Validates whether a file is a valid EPUB document
1546    ///
1547    /// This function attempts to open and parse the given file as an EPUB document.
1548    /// It performs basic validation to determine if the file conforms to the EPUB specification.
1549    ///
1550    /// ## Parameters
1551    /// - `path`: The path to the file to validate
1552    ///
1553    /// ## Returns
1554    /// - `Ok(true)`: The file is a valid EPUB document
1555    /// - `Ok(false)`: The file exists but is not a valid EPUB (e.g., missing required files,
1556    ///   invalid XML structure, unrecognized version)
1557    /// - `Err(EpubError)`: A critical error occurred (e.g., IO error, ZIP archive error,
1558    ///   encoding error, mutex poison)
1559    pub fn is_valid_epub<P: AsRef<Path>>(path: P) -> Result<bool, EpubError> {
1560        let result = EpubDoc::new(path);
1561
1562        match result {
1563            Ok(_) => Ok(true),
1564            Err(err) if Self::is_outside_error(&err) => Err(err),
1565            Err(_) => Ok(false),
1566        }
1567    }
1568
1569    /// Determines if an error is a "critical" external error that should be propagated
1570    ///
1571    /// ## Error Classification
1572    /// Outside errors (returned as `Err`):
1573    /// - ArchiveError: ZIP archive corruption or read errors
1574    /// - IOError: File system or read errors
1575    /// - MutexError: Thread synchronization errors
1576    /// - Utf8DecodeError: UTF-8 encoding errors
1577    /// - Utf16DecodeError: UTF-16 encoding errors
1578    /// - QuickXmlError: XML parser errors
1579    ///
1580    /// Irrelevant errors (returned as `Ok(false)`):
1581    /// - these errors could not have occurred in this situation.
1582    /// - EpubBuilderError
1583    /// - WalkDirError
1584    ///
1585    /// Content errors (returned as `Ok(false)`):
1586    /// - All other EpubError variants
1587    fn is_outside_error(err: &EpubError) -> bool {
1588        matches!(
1589            err,
1590            EpubError::ArchiveError { .. }
1591                | EpubError::IOError { .. }
1592                | EpubError::MutexError
1593                | EpubError::Utf8DecodeError { .. }
1594                | EpubError::Utf16DecodeError { .. }
1595                | EpubError::QuickXmlError { .. }
1596        )
1597    }
1598}
1599
1600#[cfg(test)]
1601mod tests {
1602    use std::{
1603        fs::File,
1604        io::BufReader,
1605        path::{Path, PathBuf},
1606    };
1607
1608    use crate::{epub::EpubDoc, error::EpubError, utils::XmlReader};
1609
1610    /// Section 3.3 package documents
1611    mod package_documents_tests {
1612        use std::{path::Path, sync::atomic::Ordering};
1613
1614        use crate::epub::{EpubDoc, EpubVersion};
1615
1616        /// ID: pkg-collections-unknown
1617        ///
1618        /// The package document contains a collection with an unknown role. The reading system must open the EPUB successfully.
1619        #[test]
1620        fn test_pkg_collections_unknown() {
1621            let epub_file = Path::new("./test_case/pkg-collections-unknown.epub");
1622            let doc = EpubDoc::new(epub_file);
1623            assert!(doc.is_ok());
1624        }
1625
1626        /// ID: pkg-creator-order
1627        ///
1628        /// Several creators are listed in the package document. The reading system must not display them out of order (but it may display only the first).
1629        #[test]
1630        fn test_pkg_creator_order() {
1631            let epub_file = Path::new("./test_case/pkg-creator-order.epub");
1632            let doc = EpubDoc::new(epub_file);
1633            assert!(doc.is_ok());
1634
1635            let doc = doc.unwrap();
1636            let creators = doc.get_metadata_value("creator");
1637            assert!(creators.is_some());
1638
1639            let creators = creators.unwrap();
1640            assert_eq!(creators.len(), 5);
1641            assert_eq!(
1642                creators,
1643                vec![
1644                    "Dave Cramer",
1645                    "Wendy Reid",
1646                    "Dan Lazin",
1647                    "Ivan Herman",
1648                    "Brady Duga",
1649                ]
1650            );
1651        }
1652
1653        /// ID: pkg-manifest-unknown
1654        ///
1655        /// The package document contains a manifest item with unknown properties. The reading system must open the EPUB successfully.
1656        #[test]
1657        fn test_pkg_manifest_order() {
1658            let epub_file = Path::new("./test_case/pkg-manifest-unknown.epub");
1659            let doc = EpubDoc::new(epub_file);
1660            assert!(doc.is_ok());
1661
1662            let doc = doc.unwrap();
1663            assert_eq!(doc.manifest.len(), 2);
1664            assert!(doc.get_manifest_item("nav").is_ok());
1665            assert!(doc.get_manifest_item("content_001").is_ok());
1666            assert!(doc.get_manifest_item("content_002").is_err());
1667        }
1668
1669        /// ID: pkg-meta-unknown
1670        ///
1671        /// The package document contains a meta tag with an unknown property. The reading system must open the EPUB successfully.
1672        #[test]
1673        fn test_pkg_meta_unknown() {
1674            let epub_file = Path::new("./test_case/pkg-meta-unknown.epub");
1675            let doc = EpubDoc::new(epub_file);
1676            assert!(doc.is_ok());
1677
1678            let doc = doc.unwrap();
1679            let value = doc.get_metadata_value("dcterms:isReferencedBy");
1680            assert!(value.is_some());
1681            let value = value.unwrap();
1682            assert_eq!(value.len(), 1);
1683            assert_eq!(
1684                value,
1685                vec!["https://www.w3.org/TR/epub-rs/#confreq-rs-pkg-meta-unknown"]
1686            );
1687
1688            let value = doc.get_metadata_value("dcterms:modified");
1689            assert!(value.is_some());
1690            let value = value.unwrap();
1691            assert_eq!(value.len(), 1);
1692            assert_eq!(value, vec!["2021-01-11T00:00:00Z"]);
1693
1694            let value = doc.get_metadata_value("dcterms:title");
1695            assert!(value.is_none());
1696        }
1697
1698        /// ID: pkg-meta-whitespace
1699        ///
1700        /// The package document's title and creator contain leading and trailing spaces along with excess internal whitespace. The reading system must render only a single space in all cases.
1701        #[test]
1702        fn test_pkg_meta_white_space() {
1703            let epub_file = Path::new("./test_case/pkg-meta-whitespace.epub");
1704            let doc = EpubDoc::new(epub_file);
1705            assert!(doc.is_ok());
1706
1707            let doc = doc.unwrap();
1708            let value = doc.get_metadata_value("creator");
1709            assert!(value.is_some());
1710            let value = value.unwrap();
1711            assert_eq!(value.len(), 1);
1712            assert_eq!(value, vec!["Dave Cramer"]);
1713
1714            let value = doc.get_metadata_value("description");
1715            assert!(value.is_some());
1716            let value = value.unwrap();
1717            assert_eq!(value.len(), 1);
1718            assert_eq!(
1719                value,
1720                vec![
1721                    "The package document's title and creator contain leading and trailing spaces along with excess internal whitespace. The reading system must render only a single space in all cases."
1722                ]
1723            );
1724        }
1725
1726        /// ID: pkg-spine-duplicate-item-hyperlink
1727        ///
1728        /// The spine contains several references to the same content document. The reading system must move to the position of the first duplicate in the reading order when following a hyperlink.
1729        #[test]
1730        fn test_pkg_spine_duplicate_item_hyperlink() {
1731            let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-hyperlink.epub");
1732            let doc = EpubDoc::new(epub_file);
1733            assert!(doc.is_ok());
1734
1735            let doc = doc.unwrap();
1736            assert_eq!(doc.spine.len(), 4);
1737            assert_eq!(
1738                doc.navigate_by_spine_index(0).unwrap(),
1739                doc.get_manifest_item("content_001").unwrap()
1740            );
1741            assert_eq!(
1742                doc.navigate_by_spine_index(1).unwrap(),
1743                doc.get_manifest_item("content_002").unwrap()
1744            );
1745            assert_eq!(
1746                doc.navigate_by_spine_index(2).unwrap(),
1747                doc.get_manifest_item("content_002").unwrap()
1748            );
1749            assert_eq!(
1750                doc.navigate_by_spine_index(3).unwrap(),
1751                doc.get_manifest_item("content_002").unwrap()
1752            );
1753        }
1754
1755        /// ID: pkg-spine-duplicate-item-rendering
1756        ///
1757        /// The spine contains several references to the same content document. The reading system must not skip the duplicates when rendering the reading order.
1758        #[test]
1759        fn test_pkg_spine_duplicate_item_rendering() {
1760            let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-rendering.epub");
1761            let doc = EpubDoc::new(epub_file);
1762            assert!(doc.is_ok());
1763
1764            let doc = doc.unwrap();
1765            assert_eq!(doc.spine.len(), 4);
1766
1767            let result = doc.spine_prev();
1768            assert!(result.is_none());
1769
1770            let result = doc.spine_next();
1771            assert!(result.is_some());
1772
1773            doc.spine_next();
1774            doc.spine_next();
1775            let result = doc.spine_next();
1776            assert!(result.is_none());
1777        }
1778
1779        /// ID: pkg-spine-nonlinear-activation
1780        ///
1781        /// An itemref in the spine is marked as non-linear. Although it (possibly) cannot be accessed through the table of contents, it can be reached from a link in the XHTML content.
1782        #[test]
1783        fn test_pkg_spine_nonlinear_activation() {
1784            let epub_file = Path::new("./test_case/pkg-spine-nonlinear-activation.epub");
1785            let doc = EpubDoc::new(epub_file);
1786            assert!(doc.is_ok());
1787
1788            let doc = doc.unwrap();
1789            assert!(doc.spine_prev().is_none());
1790            assert!(doc.spine_next().is_none());
1791
1792            assert!(doc.navigate_by_spine_index(1).is_some());
1793            assert!(doc.spine_prev().is_none());
1794            assert!(doc.spine_next().is_none());
1795        }
1796
1797        /// ID: pkg-spine-order
1798        ///
1799        /// Basic test of whether a reading system can display spine items in the correct order. The test fails if the reading system presents content in the order in which the file names sort, or if it presents files in manifest order rather than spine order.
1800        #[test]
1801        fn test_pkg_spine_order() {
1802            let epub_file = Path::new("./test_case/pkg-spine-order.epub");
1803            let doc = EpubDoc::new(epub_file);
1804            assert!(doc.is_ok());
1805
1806            let doc = doc.unwrap();
1807            assert_eq!(doc.spine.len(), 4);
1808            assert_eq!(
1809                doc.spine
1810                    .iter()
1811                    .map(|item| item.idref.clone())
1812                    .collect::<Vec<String>>(),
1813                vec![
1814                    "d-content_001",
1815                    "c-content_002",
1816                    "b-content_003",
1817                    "a-content_004",
1818                ]
1819            );
1820        }
1821
1822        /// ID: pkg-spine-order-svg
1823        ///
1824        /// Basic test of whether a reading system can display SVG spine items in the correct order.
1825        #[test]
1826        fn test_spine_order_svg() {
1827            let epub_file = Path::new("./test_case/pkg-spine-order-svg.epub");
1828            let doc = EpubDoc::new(epub_file);
1829            assert!(doc.is_ok());
1830
1831            let doc = doc.unwrap();
1832            assert_eq!(doc.spine.len(), 4);
1833
1834            loop {
1835                if let Some(spine) = doc.spine_next() {
1836                    let idref = doc.spine[doc.current_spine_index.load(Ordering::Relaxed)]
1837                        .idref
1838                        .clone();
1839                    let resource = doc.get_manifest_item(&idref);
1840                    assert!(resource.is_ok());
1841
1842                    let resource = resource.unwrap();
1843                    assert_eq!(spine, resource);
1844                } else {
1845                    break;
1846                }
1847            }
1848
1849            assert_eq!(doc.current_spine_index.load(Ordering::Relaxed), 3);
1850        }
1851
1852        /// ID: pkg-spine-unknown
1853        ///
1854        /// The package document contains a spine item with unknown properties. The reading system must open the EPUB successfully.
1855        #[test]
1856        fn test_pkg_spine_unknown() {
1857            let epub_file = Path::new("./test_case/pkg-spine-unknown.epub");
1858            let doc = EpubDoc::new(epub_file);
1859            assert!(doc.is_ok());
1860
1861            let doc = doc.unwrap();
1862            assert_eq!(doc.spine.len(), 1);
1863            assert_eq!(doc.spine[0].idref, "content_001");
1864            assert_eq!(doc.spine[0].id, None);
1865            assert_eq!(doc.spine[0].linear, true);
1866            assert_eq!(doc.spine[0].properties, Some("untrustworthy".to_string()));
1867        }
1868
1869        /// ID: pkg-title-order
1870        ///
1871        /// Several titles are listed in the package document. The reading system must use the first title (and whether to use other titles is not defined).
1872        #[test]
1873        fn test_pkg_title_order() {
1874            let epub_file = Path::new("./test_case/pkg-title-order.epub");
1875            let doc = EpubDoc::new(epub_file);
1876            assert!(doc.is_ok());
1877
1878            let doc = doc.unwrap();
1879            let title_list = doc.get_title();
1880            assert_eq!(title_list.len(), 6);
1881            assert_eq!(
1882                title_list,
1883                vec![
1884                    "pkg-title-order",
1885                    "This title must not display first",
1886                    "Also, this title must not display first",
1887                    "This title also must not display first",
1888                    "This title must also not display first",
1889                    "This title must not display first, also",
1890                ]
1891            );
1892        }
1893
1894        /// ID: pkg-unique-id
1895        ///
1896        /// The package document's dc:identifier is identical across two publications. The reading system should display both publications independently.
1897        #[test]
1898        fn test_pkg_unique_id() {
1899            let epub_file = Path::new("./test_case/pkg-unique-id.epub");
1900            let doc_1 = EpubDoc::new(epub_file);
1901            assert!(doc_1.is_ok());
1902
1903            let epub_file = Path::new("./test_case/pkg-unique-id_duplicate.epub");
1904            let doc_2 = EpubDoc::new(epub_file);
1905            assert!(doc_2.is_ok());
1906
1907            let doc_1 = doc_1.unwrap();
1908            let doc_2 = doc_2.unwrap();
1909
1910            assert_eq!(doc_1.get_identifier(), doc_2.get_identifier());
1911            assert_eq!(doc_1.unique_identifier, "pkg-unique-id");
1912            assert_eq!(doc_2.unique_identifier, "pkg-unique-id");
1913        }
1914
1915        /// ID: pkg-version-backward
1916        ///
1917        /// “Reading Systems MUST attempt to process an EPUB Publication whose Package Document version attribute is less than "3.0"”. This is an EPUB with package version attribute set to "0", to see if a reading system will open it.
1918        #[test]
1919        fn test_pkg_version_backward() {
1920            let epub_file = Path::new("./test_case/pkg-version-backward.epub");
1921            let doc = EpubDoc::new(epub_file);
1922            assert!(doc.is_ok());
1923
1924            let doc = doc.unwrap();
1925            assert_eq!(doc.version, EpubVersion::Version3_0);
1926        }
1927
1928        /// ID: pkg-linked-records
1929        ///
1930        /// Reading System must process and display the title and creator metadata from the package document. An ONIX 3.0 format linked metadata record exists, but contains neither title nor creator metadata.
1931        #[test]
1932        fn test_pkg_linked_records() {
1933            let epub_file = Path::new("./test_case/pkg-linked-records.epub");
1934            let doc = EpubDoc::new(epub_file);
1935            assert!(doc.is_ok());
1936
1937            let doc = doc.unwrap();
1938            assert_eq!(doc.metadata_link.len(), 3);
1939
1940            let item = doc.metadata_link.iter().find(|&item| {
1941                if let Some(properties) = &item.properties {
1942                    properties.eq("onix")
1943                } else {
1944                    false
1945                }
1946            });
1947            assert!(item.is_some());
1948        }
1949
1950        /// ID: pkg-manifest-unlisted-resource
1951        ///
1952        /// The XHTML content references an image that does not appear in the manifest. The image should not be shown.
1953        #[test]
1954        fn test_pkg_manifest_unlisted_resource() {
1955            let epub_file = Path::new("./test_case/pkg-manifest-unlisted-resource.epub");
1956            let doc = EpubDoc::new(epub_file);
1957            assert!(doc.is_ok());
1958
1959            let doc = doc.unwrap();
1960            assert!(
1961                doc.get_manifest_item_by_path("EPUB/content_001.xhtml")
1962                    .is_ok()
1963            );
1964
1965            assert!(doc.get_manifest_item_by_path("EPUB/red.png").is_err());
1966            let err = doc.get_manifest_item_by_path("EPUB/red.png").unwrap_err();
1967            assert_eq!(
1968                err.to_string(),
1969                "Resource not found: Unable to find resource from \"EPUB/red.png\"."
1970            );
1971        }
1972    }
1973
1974    /// Section 3.4 manifest fallbacks
1975    ///
1976    /// The tests under this module seem to favor the reading system rather than the EPUB format itself
1977    mod manifest_fallbacks_tests {
1978        use std::path::Path;
1979
1980        use crate::epub::EpubDoc;
1981
1982        /// ID: pub-foreign_bad-fallback
1983        ///
1984        /// This is a test of manifest fallbacks where both the spine item and the fallback are likely to be unsupported. The spine item is a DMG, with a fallback to a PSD file. Reading systems may raise an error on the ingenstion workflow.
1985        #[test]
1986        fn test_pub_foreign_bad_fallback() {
1987            let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
1988            let doc = EpubDoc::new(epub_file);
1989            assert!(doc.is_ok());
1990
1991            let doc = doc.unwrap();
1992            assert!(doc.get_manifest_item("content_001").is_ok());
1993            assert!(doc.get_manifest_item("bar").is_ok());
1994
1995            assert_eq!(
1996                doc.get_manifest_item_with_fallback("content_001", &vec!["application/xhtml+xml"])
1997                    .unwrap_err()
1998                    .to_string(),
1999                "No supported file format: The fallback resource does not contain the file format you support."
2000            );
2001        }
2002
2003        /// ID: pub-foreign_image
2004        ///
2005        /// An HTML content file contains a PSD image, with a manifest fallback to a PNG image. This tests fallbacks for resources that are not in the spine.
2006        #[test]
2007        fn test_pub_foreign_image() {
2008            let epub_file = Path::new("./test_case/pub-foreign_image.epub");
2009            let doc = EpubDoc::new(epub_file);
2010            assert!(doc.is_ok());
2011
2012            let doc = doc.unwrap();
2013            let result = doc.get_manifest_item_with_fallback(
2014                "image-tiff",
2015                &vec!["image/png", "application/xhtml+xml"],
2016            );
2017            assert!(result.is_ok());
2018
2019            let (_, mime) = result.unwrap();
2020            assert_eq!(mime, "image/png");
2021        }
2022
2023        /// ID: pub-foreign_json-spine
2024        ///
2025        /// This EPUB uses a JSON content file in the spine, with a manifest fallback to an HTML document. If the reading system does not support JSON, it should display the HTML.
2026        #[test]
2027        fn test_pub_foreign_json_spine() {
2028            let epub_file = Path::new("./test_case/pub-foreign_json-spine.epub");
2029            let doc = EpubDoc::new(epub_file);
2030            assert!(doc.is_ok());
2031
2032            let doc = doc.unwrap();
2033            let result = doc.get_manifest_item_with_fallback(
2034                "content_primary",
2035                &vec!["application/xhtml+xml", "application/json"],
2036            );
2037            assert!(result.is_ok());
2038            let (_, mime) = result.unwrap();
2039            assert_eq!(mime, "application/json");
2040
2041            let result = doc
2042                .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
2043            assert!(result.is_ok());
2044            let (_, mime) = result.unwrap();
2045            assert_eq!(mime, "application/xhtml+xml");
2046        }
2047
2048        /// ID: pub-foreign_xml-spine
2049        ///
2050        /// This EPUB uses an ordinary XML content file with mimetype application/xml in the spine, with a manifest fallback to an HTML document. If the reading system does not support XML, it should display the HTML.
2051        #[test]
2052        fn test_pub_foreign_xml_spine() {
2053            let epub_file = Path::new("./test_case/pub-foreign_xml-spine.epub");
2054            let doc = EpubDoc::new(epub_file);
2055            assert!(doc.is_ok());
2056
2057            let doc = doc.unwrap();
2058            let result = doc.get_manifest_item_with_fallback(
2059                "content_primary",
2060                &vec!["application/xhtml+xml", "application/xml"],
2061            );
2062            assert!(result.is_ok());
2063            let (_, mime) = result.unwrap();
2064            assert_eq!(mime, "application/xml");
2065
2066            let result = doc
2067                .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
2068            assert!(result.is_ok());
2069            let (_, mime) = result.unwrap();
2070            assert_eq!(mime, "application/xhtml+xml");
2071        }
2072
2073        /// ID: pub-foreign_xml-suffix-spine
2074        ///
2075        /// This EPUB uses an custom XML content file with mimetype application/dtc+xml in the spine, with a manifest fallback to an HTML document. If the reading system does not support XML, it should display the HTML.
2076        #[test]
2077        fn test_pub_foreign_xml_suffix_spine() {
2078            let epub_file = Path::new("./test_case/pub-foreign_xml-suffix-spine.epub");
2079            let doc = EpubDoc::new(epub_file);
2080            assert!(doc.is_ok());
2081
2082            let doc = doc.unwrap();
2083            let result = doc.get_manifest_item_with_fallback(
2084                "content_primary",
2085                &vec!["application/xhtml+xml", "application/dtc+xml"],
2086            );
2087            assert!(result.is_ok());
2088            let (_, mime) = result.unwrap();
2089            assert_eq!(mime, "application/dtc+xml");
2090
2091            let result = doc
2092                .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
2093            assert!(result.is_ok());
2094            let (_, mime) = result.unwrap();
2095            assert_eq!(mime, "application/xhtml+xml");
2096        }
2097    }
2098
2099    /// Section 3.9 open container format
2100    mod open_container_format_tests {
2101        use std::{cmp::min, io::Read, path::Path};
2102
2103        use sha1::{Digest, Sha1};
2104
2105        use crate::epub::EpubDoc;
2106
2107        /// ID: ocf-metainf-inc
2108        ///
2109        /// An extra configuration file, not in the reserved files' list, is added to the META-INF folder; this file must be ignored.
2110        #[test]
2111        fn test_ocf_metainf_inc() {
2112            let epub_file = Path::new("./test_case/ocf-metainf-inc.epub");
2113            let doc = EpubDoc::new(epub_file);
2114            assert!(doc.is_ok());
2115        }
2116
2117        /// ID: ocf-metainf-manifest
2118        ///
2119        /// An ancillary manifest file, containing an extra spine item, is present in the META-INF directory; this extra item must be ignored by the reading system.
2120        #[test]
2121        fn test_ocf_metainf_manifest() {
2122            let epub_file = Path::new("./test_case/ocf-metainf-manifest.epub");
2123            let doc = EpubDoc::new(epub_file);
2124            assert!(doc.is_ok());
2125        }
2126
2127        /// ID: ocf-package_arbitrary
2128        ///
2129        /// The EPUB contains three valid package files and three corresponding sets of content documents, but only one of the packages, in an unusual subdirectory, is referenced by the container.xml file. The reading system must use this package.
2130        #[test]
2131        fn test_ocf_package_arbitrary() {
2132            let epub_file = Path::new("./test_case/ocf-package_arbitrary.epub");
2133            let doc = EpubDoc::new(epub_file);
2134            assert!(doc.is_ok());
2135
2136            let doc = doc.unwrap();
2137            assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2138        }
2139
2140        /// ID: ocf-package_multiple
2141        ///
2142        /// The EPUB contains three valid package files and three corresponding sets of content documents, all referenced by the container.xml file. The reading system must use the first package.
2143        #[test]
2144        fn test_ocf_package_multiple() {
2145            let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2146            let doc = EpubDoc::new(epub_file);
2147            assert!(doc.is_ok());
2148
2149            let doc = doc.unwrap();
2150            assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2151            assert_eq!(doc.base_path, Path::new("FOO/BAR"));
2152        }
2153
2154        /// ID: ocf-url_link-leaking-relative
2155        ///
2156        /// Use a relative link with several double-dot path segments from the content to a photograph. The folder hierarchy containing the photograph starts at the root level; the relative image reference exceeds depth of hierarchy.
2157        #[test]
2158        fn test_ocf_url_link_leaking_relative() {
2159            let epub_file = Path::new("./test_case/ocf-url_link-leaking-relative.epub");
2160            let doc = EpubDoc::new(epub_file);
2161            assert!(doc.is_err());
2162            assert_eq!(
2163                doc.err().unwrap().to_string(),
2164                String::from(
2165                    "Relative link leakage: Path \"../../../../media/imgs/monastery.jpg\" is out of container range."
2166                )
2167            )
2168        }
2169
2170        /// ID: ocf-url_link-path-absolute
2171        ///
2172        /// Use a path-absolute link, i.e., beginning with a leading slash, from the content to a photograph. The folder hierarchy containing the photograph starts at the root level.
2173        #[test]
2174        fn test_ocf_url_link_path_absolute() {
2175            let epub_file = Path::new("./test_case/ocf-url_link-path-absolute.epub");
2176            let doc = EpubDoc::new(epub_file);
2177            assert!(doc.is_ok());
2178
2179            let doc = doc.unwrap();
2180            let resource = doc.manifest.get("photo").unwrap();
2181            assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2182        }
2183
2184        /// ID: ocf-url_link-relative
2185        ///
2186        /// A simple relative link from the content to a photograph. The folder hierarchy containing the photograph starts at the root level.
2187        #[test]
2188        fn test_ocf_url_link_relative() {
2189            let epub_file = Path::new("./test_case/ocf-url_link-relative.epub");
2190            let doc = EpubDoc::new(epub_file);
2191            assert!(doc.is_ok());
2192
2193            let doc = doc.unwrap();
2194            let resource = doc.manifest.get("photo").unwrap();
2195            assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2196        }
2197
2198        /// ID: ocf-url_manifest
2199        ///
2200        /// The manifest refers to an XHTML file in an arbitrary subfolder. The reading system must be able to find the content.
2201        #[test]
2202        fn test_ocf_url_manifest() {
2203            let epub_file = Path::new("./test_case/ocf-url_manifest.epub");
2204            let doc = EpubDoc::new(epub_file);
2205            assert!(doc.is_ok());
2206
2207            let doc = doc.unwrap();
2208            assert!(doc.get_manifest_item("nav").is_ok());
2209            assert!(doc.get_manifest_item("content_001").is_ok());
2210            assert!(doc.get_manifest_item("content_002").is_err());
2211        }
2212
2213        /// ID: ocf-url_relative
2214        ///
2215        /// The manifest refers to an XHTML file in an arbitrary subfolder that is relative to the package's own arbitrary folder. The reading system must be able to find the content.
2216        #[test]
2217        fn test_ocf_url_relative() {
2218            let epub_file = Path::new("./test_case/ocf-url_relative.epub");
2219            let doc = EpubDoc::new(epub_file);
2220            assert!(doc.is_ok());
2221
2222            let doc = doc.unwrap();
2223            assert_eq!(doc.package_path, Path::new("foo/BAR/baz.opf"));
2224            assert_eq!(doc.base_path, Path::new("foo/BAR"));
2225            assert_eq!(
2226                doc.manifest.get("nav").unwrap().path,
2227                Path::new("foo/BAR/nav.xhtml")
2228            );
2229            assert_eq!(
2230                doc.manifest.get("content_001").unwrap().path,
2231                Path::new("foo/BAR/qux/content_001.xhtml")
2232            );
2233            assert!(doc.get_manifest_item("nav").is_ok());
2234            assert!(doc.get_manifest_item("content_001").is_ok());
2235        }
2236
2237        /// ID: ocf-zip-comp
2238        ///
2239        /// MUST treat any OCF ZIP container that uses compression techniques other than Deflate as in error.
2240        /// This test case does not use compression methods other than Deflate and cannot detect whether it is effective.
2241        #[test]
2242        fn test_ocf_zip_comp() {
2243            let epub_file = Path::new("./test_case/ocf-zip-comp.epub");
2244            let doc = EpubDoc::new(epub_file);
2245            assert!(doc.is_ok());
2246        }
2247
2248        /// ID: ocf-zip-mult
2249        ///
2250        /// MUST treat any OCF ZIP container that splits the content into segments as in error.
2251        /// This test case is not a segmented OCF ZIP container and cannot be tested to see if it is valid.
2252        #[test]
2253        fn test_ocf_zip_mult() {
2254            let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2255            let doc = EpubDoc::new(epub_file);
2256            assert!(doc.is_ok());
2257        }
2258
2259        /// ID: ocf-font_obfuscation
2260        ///
2261        /// An obfuscated (TrueType) font should be displayed after de-obfuscation.
2262        #[test]
2263        fn test_ocf_font_obfuscation() {
2264            let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2265            let doc = EpubDoc::new(epub_file);
2266            assert!(doc.is_ok());
2267
2268            let doc = doc.unwrap();
2269            let unique_id = doc.unique_identifier.clone();
2270
2271            let mut hasher = Sha1::new();
2272            hasher.update(unique_id.as_bytes());
2273            let hash = hasher.finalize();
2274            let mut key = vec![0u8; 1040];
2275            for i in 0..1040 {
2276                key[i] = hash[i % hash.len()];
2277            }
2278
2279            assert!(doc.encryption.is_some());
2280            assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2281
2282            let data = &doc.encryption.unwrap()[0];
2283            assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2284
2285            let font_file = doc
2286                .archive
2287                .lock()
2288                .unwrap()
2289                .by_name(&data.data)
2290                .unwrap()
2291                .bytes()
2292                .collect::<Result<Vec<u8>, _>>();
2293            assert!(font_file.is_ok());
2294            let font_file = font_file.unwrap();
2295
2296            // 根据EPUB规范,字体混淆是直接对字体文件进行的,不需要解压步骤,直接进行去混淆处理
2297            let mut deobfuscated = font_file.clone();
2298            for i in 0..min(1040, deobfuscated.len()) {
2299                deobfuscated[i] ^= key[i];
2300            }
2301
2302            assert!(is_valid_font(&deobfuscated));
2303        }
2304
2305        /// ID: ocf-font_obfuscation-bis
2306        ///
2307        /// An obfuscated (TrueType) font should not be displayed after de-obfuscation, because the obfuscation used a different publication id.
2308        #[test]
2309        fn test_ocf_font_obfuscation_bis() {
2310            let epub_file = Path::new("./test_case/ocf-font_obfuscation_bis.epub");
2311            let doc = EpubDoc::new(epub_file);
2312            assert!(doc.is_ok());
2313
2314            let doc = doc.unwrap();
2315
2316            let wrong_unique_id = "wrong-publication-id";
2317            let mut hasher = Sha1::new();
2318            hasher.update(wrong_unique_id.as_bytes());
2319            let hash = hasher.finalize();
2320            let mut wrong_key = vec![0u8; 1040];
2321            for i in 0..1040 {
2322                wrong_key[i] = hash[i % hash.len()];
2323            }
2324
2325            assert!(doc.encryption.is_some());
2326            assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2327
2328            let data = &doc.encryption.unwrap()[0];
2329            assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2330
2331            let font_file = doc
2332                .archive
2333                .lock()
2334                .unwrap()
2335                .by_name(&data.data)
2336                .unwrap()
2337                .bytes()
2338                .collect::<Result<Vec<u8>, _>>();
2339            assert!(font_file.is_ok());
2340            let font_file = font_file.unwrap();
2341
2342            // 使用错误的密钥进行去混淆
2343            let mut deobfuscated_with_wrong_key = font_file.clone();
2344            for i in 0..std::cmp::min(1040, deobfuscated_with_wrong_key.len()) {
2345                deobfuscated_with_wrong_key[i] ^= wrong_key[i];
2346            }
2347
2348            assert!(!is_valid_font(&deobfuscated_with_wrong_key));
2349        }
2350
2351        fn is_valid_font(data: &[u8]) -> bool {
2352            if data.len() < 4 {
2353                return false;
2354            }
2355            let sig = &data[0..4];
2356            // OTF: "OTTO"
2357            // TTF: 0x00010000, 0x00020000, "true", "typ1"
2358            sig == b"OTTO"
2359                || sig == b"\x00\x01\x00\x00"
2360                || sig == b"\x00\x02\x00\x00"
2361                || sig == b"true"
2362                || sig == b"typ1"
2363        }
2364    }
2365
2366    #[test]
2367    fn test_parse_container() {
2368        let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2369        let doc = EpubDoc::new(epub_file);
2370        assert!(doc.is_ok());
2371
2372        // let doc = doc.unwrap();
2373        let container = r#"
2374        <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2375            <rootfiles></rootfiles>
2376        </container>
2377        "#
2378        .to_string();
2379
2380        let result = EpubDoc::<BufReader<File>>::parse_container(container);
2381        assert!(result.is_err());
2382        assert_eq!(
2383            result.unwrap_err(),
2384            EpubError::NonCanonicalFile { tag: "rootfile".to_string() }
2385        );
2386
2387        let container = r#"
2388        <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2389            <rootfiles>
2390                <rootfile media-type="application/oebps-package+xml"/>
2391            </rootfiles>
2392        </container>
2393        "#
2394        .to_string();
2395
2396        let result = EpubDoc::<BufReader<File>>::parse_container(container);
2397        assert!(result.is_err());
2398        assert_eq!(
2399            result.unwrap_err(),
2400            EpubError::MissingRequiredAttribute {
2401                tag: "rootfile".to_string(),
2402                attribute: "full-path".to_string(),
2403            }
2404        );
2405
2406        let container = r#"
2407        <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2408            <rootfiles>
2409                <rootfile media-type="application/oebps-package+xml" full-path="EPUB/content.opf"/>
2410            </rootfiles>
2411        </container>
2412        "#
2413        .to_string();
2414
2415        let result = EpubDoc::<BufReader<File>>::parse_container(container);
2416        assert!(result.is_ok());
2417        assert_eq!(result.unwrap(), PathBuf::from("EPUB/content.opf"))
2418    }
2419
2420    #[test]
2421    fn test_parse_manifest() {
2422        let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2423        let doc = EpubDoc::new(epub_file);
2424        assert!(doc.is_ok());
2425
2426        let manifest = r#"
2427        <manifest>
2428            <item href="content_001.xhtml" media-type="application/xhtml+xml"/>
2429            <item properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2430        </manifest>
2431        "#;
2432        let mut doc = doc.unwrap();
2433        let element = XmlReader::parse(manifest);
2434        assert!(element.is_ok());
2435
2436        let element = element.unwrap();
2437        let result = doc.parse_manifest(&element);
2438        assert!(result.is_err());
2439        assert_eq!(
2440            result.unwrap_err(),
2441            EpubError::MissingRequiredAttribute {
2442                tag: "item".to_string(),
2443                attribute: "id".to_string(),
2444            },
2445        );
2446
2447        let manifest = r#"
2448        <manifest>
2449            <item id="content_001" media-type="application/xhtml+xml"/>
2450            <item id="nav" properties="nav" media-type="application/xhtml+xml"/>
2451        </manifest>
2452        "#;
2453        let element = XmlReader::parse(manifest);
2454        assert!(element.is_ok());
2455
2456        let element = element.unwrap();
2457        let result = doc.parse_manifest(&element);
2458        assert!(result.is_err());
2459        assert_eq!(
2460            result.unwrap_err(),
2461            EpubError::MissingRequiredAttribute {
2462                tag: "item".to_string(),
2463                attribute: "href".to_string(),
2464            },
2465        );
2466
2467        let manifest = r#"
2468        <manifest>
2469            <item id="content_001" href="content_001.xhtml"/>
2470            <item id="nav" properties="nav" href="nav.xhtml"/>
2471        </manifest>
2472        "#;
2473        let element = XmlReader::parse(manifest);
2474        assert!(element.is_ok());
2475
2476        let element = element.unwrap();
2477        let result = doc.parse_manifest(&element);
2478        assert!(result.is_err());
2479        assert_eq!(
2480            result.unwrap_err(),
2481            EpubError::MissingRequiredAttribute {
2482                tag: "item".to_string(),
2483                attribute: "media-type".to_string(),
2484            },
2485        );
2486
2487        let manifest = r#"
2488        <manifest>
2489            <item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
2490            <item id="nav" properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2491        </manifest>
2492        "#;
2493        let element = XmlReader::parse(manifest);
2494        assert!(element.is_ok());
2495
2496        let element = element.unwrap();
2497        let result = doc.parse_manifest(&element);
2498        assert!(result.is_ok());
2499    }
2500
2501    /// Test for function `has_encryption`
2502    #[test]
2503    fn test_fn_has_encryption() {
2504        let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2505        let doc = EpubDoc::new(epub_file);
2506        assert!(doc.is_ok());
2507
2508        let doc = doc.unwrap();
2509        assert!(doc.has_encryption());
2510    }
2511
2512    /// This test is used to detect whether the "META-INF/encryption.xml" file is parsed correctly
2513    #[test]
2514    fn test_fn_parse_encryption() {
2515        let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2516        let doc = EpubDoc::new(epub_file);
2517        assert!(doc.is_ok());
2518
2519        let doc = doc.unwrap();
2520        assert!(doc.encryption.is_some());
2521
2522        let encryption = doc.encryption.unwrap();
2523        assert_eq!(encryption.len(), 1);
2524        assert_eq!(encryption[0].method, "http://www.idpf.org/2008/embedding");
2525        assert_eq!(encryption[0].data, "EPUB/fonts/Lobster.ttf");
2526    }
2527
2528    #[test]
2529    fn test_get_metadata_existing_key() {
2530        let epub_file = Path::new("./test_case/epub-33.epub");
2531        let doc = EpubDoc::new(epub_file);
2532        assert!(doc.is_ok());
2533
2534        let doc = doc.unwrap();
2535
2536        let titles = doc.get_metadata("title");
2537        assert!(titles.is_some());
2538
2539        let titles = titles.unwrap();
2540        assert_eq!(titles.len(), 1);
2541        assert_eq!(titles[0].property, "title");
2542        assert_eq!(titles[0].value, "EPUB 3.3");
2543
2544        let languages = doc.get_metadata("language");
2545        assert!(languages.is_some());
2546
2547        let languages = languages.unwrap();
2548        assert_eq!(languages.len(), 1);
2549        assert_eq!(languages[0].property, "language");
2550        assert_eq!(languages[0].value, "en-us");
2551
2552        let language = doc.get_language();
2553        assert_eq!(language, vec!["en-us"]);
2554    }
2555
2556    #[test]
2557    fn test_get_metadata_nonexistent_key() {
2558        let epub_file = Path::new("./test_case/epub-33.epub");
2559        let doc = EpubDoc::new(epub_file);
2560        assert!(doc.is_ok());
2561
2562        let doc = doc.unwrap();
2563        let metadata = doc.get_metadata("nonexistent");
2564        assert!(metadata.is_none());
2565    }
2566
2567    #[test]
2568    fn test_get_metadata_multiple_items_same_type() {
2569        let epub_file = Path::new("./test_case/epub-33.epub");
2570        let doc = EpubDoc::new(epub_file);
2571        assert!(doc.is_ok());
2572
2573        let doc = doc.unwrap();
2574
2575        let creators = doc.get_metadata("creator");
2576        assert!(creators.is_some());
2577
2578        let creators = creators.unwrap();
2579        assert_eq!(creators.len(), 3);
2580
2581        assert_eq!(creators[0].id, Some("creator_id_0".to_string()));
2582        assert_eq!(creators[0].property, "creator");
2583        assert_eq!(creators[0].value, "Matt Garrish, DAISY Consortium");
2584
2585        assert_eq!(creators[1].id, Some("creator_id_1".to_string()));
2586        assert_eq!(creators[1].property, "creator");
2587        assert_eq!(creators[1].value, "Ivan Herman, W3C");
2588
2589        assert_eq!(creators[2].id, Some("creator_id_2".to_string()));
2590        assert_eq!(creators[2].property, "creator");
2591        assert_eq!(creators[2].value, "Dave Cramer, Invited Expert");
2592    }
2593
2594    #[test]
2595    fn test_get_metadata_with_refinement() {
2596        let epub_file = Path::new("./test_case/epub-33.epub");
2597        let doc = EpubDoc::new(epub_file);
2598        assert!(doc.is_ok());
2599
2600        let doc = doc.unwrap();
2601
2602        let title = doc.get_metadata("title");
2603        assert!(title.is_some());
2604
2605        let title = title.unwrap();
2606        assert_eq!(title.len(), 1);
2607        assert_eq!(title[0].refined.len(), 1);
2608        assert_eq!(title[0].refined[0].property, "title-type");
2609        assert_eq!(title[0].refined[0].value, "main");
2610    }
2611
2612    #[test]
2613    fn test_get_manifest_item_with_fallback() {
2614        let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
2615        let doc = EpubDoc::new(epub_file);
2616        assert!(doc.is_ok());
2617
2618        let doc = doc.unwrap();
2619        assert!(doc.get_manifest_item("content_001").is_ok());
2620        assert!(doc.get_manifest_item("bar").is_ok());
2621
2622        // 当回退链上存在可回退资源时能获取资源
2623        if let Ok((_, mime)) =
2624            doc.get_manifest_item_with_fallback("content_001", &vec!["image/psd"])
2625        {
2626            assert_eq!(mime, "image/psd");
2627        } else {
2628            assert!(false, "get_manifest_item_with_fallback failed");
2629        }
2630
2631        // 当回退链上不存在可回退资源时无法获取资源
2632        assert_eq!(
2633            doc.get_manifest_item_with_fallback("content_001", &vec!["application/xhtml+xml"])
2634                .unwrap_err()
2635                .to_string(),
2636            "No supported file format: The fallback resource does not contain the file format you support."
2637        );
2638    }
2639
2640    #[test]
2641    fn test_get_cover() {
2642        let epub_file = Path::new("./test_case/pkg-cover-image.epub");
2643        let doc = EpubDoc::new(epub_file);
2644        if let Err(err) = &doc {
2645            println!("{}", err);
2646        }
2647        assert!(doc.is_ok());
2648
2649        let doc = doc.unwrap();
2650        let result = doc.get_cover();
2651        assert!(result.is_some());
2652
2653        let (data, mime) = result.unwrap();
2654        assert_eq!(data.len(), 5785);
2655        assert_eq!(mime, "image/jpeg");
2656    }
2657
2658    #[test]
2659    fn test_epub_2() {
2660        let epub_file = Path::new("./test_case/epub-2.epub");
2661        let doc = EpubDoc::new(epub_file);
2662        assert!(doc.is_ok());
2663
2664        let doc = doc.unwrap();
2665
2666        let titles = doc.get_title();
2667        assert_eq!(titles, vec!["Minimal EPUB 2.0"]);
2668    }
2669
2670    #[test]
2671    fn test_is_valid_epub_valid_file() {
2672        let result = EpubDoc::is_valid_epub("./test_case/epub-2.epub");
2673        assert!(result.is_ok());
2674        assert_eq!(result.unwrap(), true);
2675    }
2676
2677    #[test]
2678    fn test_is_valid_epub_invalid_path() {
2679        let result = EpubDoc::is_valid_epub("./test_case/nonexistent.epub");
2680        assert!(result.is_err());
2681    }
2682
2683    #[test]
2684    fn test_is_valid_epub_corrupted_zip() {
2685        let temp_dir = std::env::temp_dir();
2686        let corrupted_file = temp_dir.join("corrupted.epub");
2687
2688        std::fs::write(&corrupted_file, b"not a valid zip file").unwrap();
2689
2690        let result = EpubDoc::is_valid_epub(&corrupted_file);
2691
2692        assert!(result.is_err());
2693        let err = result.unwrap_err();
2694        assert!(matches!(err, EpubError::ArchiveError { .. }));
2695
2696        std::fs::remove_file(corrupted_file).ok();
2697    }
2698
2699    #[test]
2700    fn test_is_valid_epub_valid_epub_3() {
2701        let result = EpubDoc::is_valid_epub("./test_case/epub-33.epub");
2702        assert!(result.is_ok());
2703        assert_eq!(result.unwrap(), true);
2704    }
2705
2706    #[test]
2707    fn test_is_outside_error() {
2708        let archive_error = EpubError::ArchiveError {
2709            source: zip::result::ZipError::Io(std::io::Error::new(
2710                std::io::ErrorKind::Other,
2711                "test",
2712            )),
2713        };
2714        assert!(EpubDoc::<BufReader<File>>::is_outside_error(&archive_error));
2715
2716        let io_error = EpubError::IOError {
2717            source: std::io::Error::new(std::io::ErrorKind::NotFound, "test"),
2718        };
2719        assert!(EpubDoc::<BufReader<File>>::is_outside_error(&io_error));
2720
2721        let non_canonical = EpubError::NonCanonicalEpub { expected_file: "test".to_string() };
2722        assert!(!EpubDoc::<BufReader<File>>::is_outside_error(
2723            &non_canonical
2724        ));
2725
2726        let missing_attr = EpubError::MissingRequiredAttribute {
2727            tag: "test".to_string(),
2728            attribute: "id".to_string(),
2729        };
2730        assert!(!EpubDoc::<BufReader<File>>::is_outside_error(&missing_attr));
2731    }
2732
2733    mod metadata_sheet_tests {
2734        use crate::epub::EpubDoc;
2735        use std::path::Path;
2736
2737        #[test]
2738        fn test_get_metadata_sheet_basic_fields() {
2739            let epub_file = Path::new("./test_case/epub-33.epub");
2740            let doc = EpubDoc::new(epub_file);
2741            assert!(doc.is_ok());
2742
2743            let doc = doc.unwrap();
2744            let sheet = doc.get_metadata_sheet();
2745
2746            assert_eq!(sheet.title.len(), 1);
2747            assert_eq!(sheet.title[0], "EPUB 3.3");
2748
2749            assert_eq!(sheet.language.len(), 1);
2750            assert_eq!(sheet.language[0], "en-us");
2751
2752            assert_eq!(sheet.publisher, "World Wide Web Consortium");
2753
2754            assert_eq!(
2755                sheet.rights,
2756                "https://www.w3.org/Consortium/Legal/2015/doc-license"
2757            );
2758        }
2759
2760        #[test]
2761        fn test_get_metadata_sheet_multiple_creators() {
2762            let epub_file = Path::new("./test_case/epub-33.epub");
2763            let doc = EpubDoc::new(epub_file);
2764            assert!(doc.is_ok());
2765
2766            let doc = doc.unwrap();
2767            let sheet = doc.get_metadata_sheet();
2768
2769            assert_eq!(sheet.creator.len(), 3);
2770            assert_eq!(sheet.creator[0], "Matt Garrish, DAISY Consortium");
2771            assert_eq!(sheet.creator[1], "Ivan Herman, W3C");
2772            assert_eq!(sheet.creator[2], "Dave Cramer, Invited Expert");
2773        }
2774
2775        #[test]
2776        fn test_get_metadata_sheet_multiple_subjects() {
2777            let epub_file = Path::new("./test_case/epub-33.epub");
2778            let doc = EpubDoc::new(epub_file);
2779            assert!(doc.is_ok());
2780
2781            let doc = doc.unwrap();
2782            let sheet = doc.get_metadata_sheet();
2783
2784            assert_eq!(sheet.subject.len(), 2);
2785            assert_eq!(sheet.subject[0], "Information systems~World Wide Web");
2786            assert_eq!(
2787                sheet.subject[1],
2788                "General and reference~Computing standards, RFCs and guidelines"
2789            );
2790        }
2791
2792        #[test]
2793        fn test_get_metadata_sheet_identifier_with_id() {
2794            let epub_file = Path::new("./test_case/epub-33.epub");
2795            let doc = EpubDoc::new(epub_file);
2796            assert!(doc.is_ok());
2797
2798            let doc = doc.unwrap();
2799            let sheet = doc.get_metadata_sheet();
2800
2801            assert!(sheet.identifier.contains_key("pub-id"));
2802            assert_eq!(
2803                sheet.identifier.get("pub-id"),
2804                Some(&"https://www.w3.org/TR/epub-33/".to_string())
2805            );
2806        }
2807
2808        #[test]
2809        fn test_get_metadata_sheet_missing_scalar_fields() {
2810            let epub_file = Path::new("./test_case/epub-33.epub");
2811            let doc = EpubDoc::new(epub_file);
2812            assert!(doc.is_ok());
2813
2814            let doc = doc.unwrap();
2815            let sheet = doc.get_metadata_sheet();
2816
2817            assert!(sheet.coverage.is_empty());
2818            assert!(sheet.description.is_empty());
2819            assert!(sheet.format.is_empty());
2820            assert!(sheet.source.is_empty());
2821            assert!(sheet.epub_type.is_empty());
2822            assert!(sheet.contributor.is_empty());
2823            assert!(sheet.relation.is_empty());
2824        }
2825
2826        #[test]
2827        fn test_get_metadata_sheet_title_refinement_via_get_metadata() {
2828            let epub_file = Path::new("./test_case/epub-33.epub");
2829            let doc = EpubDoc::new(epub_file);
2830            assert!(doc.is_ok());
2831
2832            let doc = doc.unwrap();
2833            let title_metadata = doc.get_metadata("title");
2834            assert!(title_metadata.is_some());
2835
2836            let title_metadata = title_metadata.unwrap();
2837            assert_eq!(title_metadata.len(), 1);
2838            assert_eq!(title_metadata[0].refined.len(), 1);
2839            assert_eq!(title_metadata[0].refined[0].property, "title-type");
2840            assert_eq!(title_metadata[0].refined[0].value, "main");
2841
2842            let sheet = doc.get_metadata_sheet();
2843            assert_eq!(sheet.title.len(), 1);
2844            assert_eq!(sheet.title[0], "EPUB 3.3");
2845        }
2846
2847        #[test]
2848        fn test_get_metadata_sheet_ignores_unknown_properties() {
2849            let epub_file = Path::new("./test_case/epub-33.epub");
2850            let doc = EpubDoc::new(epub_file);
2851            assert!(doc.is_ok());
2852
2853            let doc = doc.unwrap();
2854            let sheet = doc.get_metadata_sheet();
2855
2856            assert_eq!(sheet.title.len(), 1);
2857            assert_eq!(sheet.creator.len(), 3);
2858            assert_eq!(sheet.subject.len(), 2);
2859        }
2860
2861        #[test]
2862        fn test_get_metadata_sheet_idempotent() {
2863            let epub_file = Path::new("./test_case/epub-33.epub");
2864            let doc = EpubDoc::new(epub_file);
2865            assert!(doc.is_ok());
2866
2867            let doc = doc.unwrap();
2868            let sheet1 = doc.get_metadata_sheet();
2869            let sheet2 = doc.get_metadata_sheet();
2870
2871            assert_eq!(sheet1.title, sheet2.title);
2872            assert_eq!(sheet1.creator, sheet2.creator);
2873            assert_eq!(sheet1.language, sheet2.language);
2874            assert_eq!(sheet1.identifier, sheet2.identifier);
2875            assert_eq!(sheet1.date, sheet2.date);
2876        }
2877    }
2878}