Skip to main content

lib_epub/
epub.rs

1//! The core module of the EPUB parsing library
2//!
3//! This module provides complete parsing functionality for EPUB ebook files
4//! and is the core component of the entire library. The `EpubDoc` structure
5//! encapsulates all the parsing logic and data access interfaces for EPUB files.
6//!
7//! ## Main references to EPUB specs:
8//! - <https://www.w3.org/TR/epub-33>
9//! - <https://idpf.org/epub/201>
10//!
11//! ## Potential Issues
12//! - The generic parameter `R: Read + Seek` increases complexity, particularly
13//!   in asynchronous environments. The current design is not conducive to multi-threaded
14//!   concurrent access and requires an external synchronization mechanism.
15//! - Some error handling may not be sufficiently nuanced, and certain edge cases
16//!   may not be adequately considered.
17//! - Loading the entire EPUB document at once may result in significant memory consumption,
18//!   especially for large publications.
19//!
20//! ## Future Work
21//! - Supports more EPUB specification features, such as media overlay and scripts.
22
23use std::{
24    collections::HashMap,
25    fs::{self, File},
26    io::{BufReader, Read, Seek},
27    path::{Path, PathBuf},
28    sync::{
29        Arc, Mutex,
30        atomic::{AtomicUsize, Ordering},
31    },
32};
33
34use log::warn;
35use zip::{ZipArchive, result::ZipError};
36
37use crate::{
38    error::EpubError,
39    types::{
40        EncryptionData, EpubVersion, ManifestItem, MetadataItem, MetadataLinkItem,
41        MetadataRefinement, NavPoint, SpineItem,
42    },
43    utils::{
44        DecodeBytes, NormalizeWhitespace, XmlElement, XmlReader, adobe_font_dencryption,
45        check_realtive_link_leakage, compression_method_check, get_file_in_zip_archive,
46        idpf_font_dencryption,
47    },
48};
49
50/// EPUB document parser, representing a loaded and parsed EPUB publication
51///
52/// The `EpubDoc` structure is the core of the entire EPUB parsing library.
53/// It encapsulates all the parsing logic and data access interfaces for EPUB files.
54/// It is responsible for parsing various components of an EPUB, including metadata,
55/// manifests, reading order, table of contents navigation, and encrypted information,
56/// and provides methods for accessing this data.
57///
58/// Provides a unified data access interface for EPUB files, hiding the underlying
59/// file structure and parsing details. Strictly adheres to the EPUB specification
60/// in implementing the parsing logic to ensure compatibility with the standard.
61///
62/// ## Usage
63///
64/// ```rust
65/// use lib_epub::epub::EpubDoc;
66///
67/// let doc = EpubDoc::new("./test_case/epub-33.epub");
68/// assert!(doc.is_ok());
69/// ```
70///
71/// ## Notes
72/// - The `EpubDoc` structure is thread-safe **if and only if** the structure is immutable.
73/// - The fact that `EpubDoc` is mutable has no practical meaning; modifications
74///   to the structure data are not stored in the epub file.
75pub struct EpubDoc<R: Read + Seek> {
76    /// The structure of the epub file that actually holds it
77    pub(crate) archive: Arc<Mutex<ZipArchive<R>>>,
78
79    /// The path to the target epub file
80    pub(crate) epub_path: PathBuf,
81
82    /// The path to the OPF file
83    pub package_path: PathBuf,
84
85    /// The path to the directory where the opf file is located
86    pub base_path: PathBuf,
87
88    /// The epub version
89    pub version: EpubVersion,
90
91    /// The unique identifier of the epub file
92    ///
93    /// This identifier is the actual value of the unique-identifier attribute of the package.
94    pub unique_identifier: String,
95
96    /// Epub metadata extracted from OPF
97    pub metadata: Vec<MetadataItem>,
98
99    /// Data in metadata that points to external files
100    pub metadata_link: Vec<MetadataLinkItem>,
101
102    /// A list of resources contained inside an epub extracted from OPF
103    ///
104    /// All resources in the epub file are declared here,
105    /// and undeclared resources should not be stored in the epub file and cannot be obtained from it.
106    pub manifest: HashMap<String, ManifestItem>,
107
108    /// Physical reading order of publications extracted from OPF
109    ///
110    /// This attribute declares the order in which multiple files
111    /// containing published content should be displayed.
112    pub spine: Vec<SpineItem>,
113
114    /// The encryption.xml extracted from the META-INF directory
115    pub encryption: Option<Vec<EncryptionData>>,
116
117    /// The navigation data of the epub file
118    pub catalog: Vec<NavPoint>,
119
120    /// The title of the catalog
121    pub catalog_title: String,
122
123    /// The index of the current reading spine
124    current_spine_index: AtomicUsize,
125
126    /// Whether the epub file contains encryption information
127    has_encryption: bool,
128}
129
130impl<R: Read + Seek> EpubDoc<R> {
131    /// Creates a new EPUB document instance from a reader
132    ///
133    /// This function is responsible for the core logic of parsing EPUB files,
134    /// including verifying the file format, parsing container information,
135    /// loading the OPF package document, and extracting metadata, manifest,
136    /// reading order, and other core information.
137    ///
138    /// ## Parameters
139    /// - `reader`: The data source that implements the `Read` and `Seek` traits,
140    ///   usually a file or memory buffer
141    /// - `epub_path`: The path to the EPUB file, used for path resolution and validation
142    ///
143    /// ## Return
144    /// - `Ok(EpubDoc<R>)`: The successfully parsed EPUB document object
145    /// - `Err(EpubError)`: Errors encountered during parsing
146    ///
147    /// ## Notes
148    /// - This function assumes the EPUB file structure is valid
149    pub fn from_reader(reader: R, epub_path: PathBuf) -> Result<Self, EpubError> {
150        // Parsing process
151        // 1. Verify that the ZIP compression method conforms to the EPUB specification
152        // 2. Parse `META-INF/container.xml` retrieves the location of the OPF file
153        // 3. Parses the OPF file to obtain package documentation information
154        // 4. Extracts version information
155        // 5. Parses metadata, manifest, and spine
156        // 6. Parses encrypted information and directory navigation
157        // 7. Verifies and extracts the unique identifier
158
159        let mut archive = ZipArchive::new(reader).map_err(EpubError::from)?;
160        let epub_path = fs::canonicalize(epub_path)?;
161
162        compression_method_check(&mut archive)?;
163
164        let container =
165            get_file_in_zip_archive(&mut archive, "META-INF/container.xml")?.decode()?;
166        let package_path = Self::parse_container(container)?;
167        let base_path = package_path
168            .parent()
169            .expect("所有文件的父目录不能为空")
170            .to_path_buf();
171
172        let opf_file =
173            get_file_in_zip_archive(&mut archive, package_path.to_str().unwrap())?.decode()?;
174        let package = XmlReader::parse(&opf_file)?;
175
176        let version = Self::determine_epub_version(&package)?;
177        let has_encryption = archive
178            .by_path(Path::new("META-INF/encryption.xml"))
179            .is_ok();
180
181        let mut doc = Self {
182            archive: Arc::new(Mutex::new(archive)),
183            epub_path,
184            package_path,
185            base_path,
186            version,
187            unique_identifier: String::new(),
188            metadata: vec![],
189            metadata_link: vec![],
190            manifest: HashMap::new(),
191            spine: vec![],
192            encryption: None,
193            catalog: vec![],
194            catalog_title: String::new(),
195            current_spine_index: AtomicUsize::new(0),
196            has_encryption,
197        };
198
199        let metadata_element = package.find_elements_by_name("metadata").next().unwrap();
200        let manifest_element = package.find_elements_by_name("manifest").next().unwrap();
201        let spine_element = package.find_elements_by_name("spine").next().unwrap();
202
203        doc.parse_metadata(metadata_element)?;
204        doc.parse_manifest(manifest_element)?;
205        doc.parse_spine(spine_element)?;
206        doc.parse_encryption()?;
207        doc.parse_catalog()?;
208
209        // 断言必有唯一标识符
210        doc.unique_identifier = if let Some(uid) = package.get_attr("unique-identifier") {
211            doc.metadata.iter().find(|item| {
212                item.property == "identifier" && item.id.as_ref().is_some_and(|id| id == &uid)
213            })
214        } else {
215            doc.metadata
216                .iter()
217                .find(|item| item.property == "identifier")
218        }
219        .map(|item| item.value.clone())
220        .ok_or_else(|| EpubError::NonCanonicalFile { tag: "dc:identifier".to_string() })?;
221
222        Ok(doc)
223    }
224
225    /// Parse the EPUB container file (META-INF/container.xml)
226    ///
227    /// This function parses the container information in the EPUB file 、
228    /// to extract the path to the OPF package file. According to the EPUB
229    /// specification, the `container.xml` file must exist in the `META-INF`
230    /// directory and contain at least one `rootfile` element pointing to
231    /// the main OPF file. When multiple `rootfile` elements exist, the first
232    /// element pointing to the OPF file is used as the default.
233    ///
234    /// ## Parameters
235    /// - `data`: The content string of the container.xml
236    ///
237    /// ## Return
238    /// - `Ok(PathBuf)`: The path to the successfully parsed OPF file
239    /// - `Err(EpubError)`: Errors encountered during parsing
240    fn parse_container(data: String) -> Result<PathBuf, EpubError> {
241        let root = XmlReader::parse(&data)?;
242        let rootfile = root
243            .find_elements_by_name("rootfile")
244            .next()
245            .ok_or_else(|| EpubError::NonCanonicalFile { tag: "rootfile".to_string() })?;
246
247        let attr =
248            rootfile
249                .get_attr("full-path")
250                .ok_or_else(|| EpubError::MissingRequiredAttribute {
251                    tag: "rootfile".to_string(),
252                    attribute: "full-path".to_string(),
253                })?;
254
255        Ok(PathBuf::from(attr))
256    }
257
258    /// Parse the EPUB metadata section
259    ///
260    /// This function is responsible for parsing the `<metadata>` elements
261    /// in the OPF file to extract basic information about the publication.
262    /// It handles metadata elements from different namespaces:
263    /// - Elements in the Dublin Core namespace (`http://purl.org/dc/elements/1.1/`)
264    /// - Elements in the OPF namespace (`http://www.idpf.org/2007/opf`)
265    ///
266    /// ## Parameters
267    /// - `metadata_element`: A reference to the `<metadata>` element in the OPF file
268    fn parse_metadata(&mut self, metadata_element: &XmlElement) -> Result<(), EpubError> {
269        const DC_NAMESPACE: &str = "http://purl.org/dc/elements/1.1/";
270        const OPF_NAMESPACE: &str = "http://www.idpf.org/2007/opf";
271
272        let mut metadata = Vec::new();
273        let mut metadata_link = Vec::new();
274        let mut refinements = HashMap::<String, Vec<MetadataRefinement>>::new();
275
276        for element in metadata_element.children() {
277            match &element.namespace {
278                Some(namespace) if namespace == DC_NAMESPACE => {
279                    self.parse_dc_metadata(element, &mut metadata)?
280                }
281
282                Some(namespace) if namespace == OPF_NAMESPACE => self.parse_opf_metadata(
283                    element,
284                    &mut metadata,
285                    &mut metadata_link,
286                    &mut refinements,
287                )?,
288
289                _ => {}
290            };
291        }
292
293        for item in metadata.iter_mut() {
294            if let Some(id) = &item.id {
295                if let Some(refinements) = refinements.remove(id) {
296                    item.refined = refinements;
297                }
298            }
299        }
300
301        self.metadata = metadata;
302        self.metadata_link = metadata_link;
303        Ok(())
304    }
305
306    /// Parse the EPUB manifest section
307    ///
308    /// This function parses the `<manifest>` element in the OPF file, extracting
309    /// information about all resource files in the publication. Each resource contains
310    /// basic information such as id, file path, MIME type, as well as optional
311    /// attributes and fallback resource information.
312    ///
313    /// ## Parameters
314    /// - `manifest_element`: A reference to the `<manifest>` element in the OPF file
315    fn parse_manifest(&mut self, manifest_element: &XmlElement) -> Result<(), EpubError> {
316        let estimated_items = manifest_element.children().count();
317        let mut resources = HashMap::with_capacity(estimated_items);
318
319        for element in manifest_element.children() {
320            let id = element
321                .get_attr("id")
322                .ok_or_else(|| EpubError::MissingRequiredAttribute {
323                    tag: element.tag_name(),
324                    attribute: "id".to_string(),
325                })?
326                .to_string();
327            let path = element
328                .get_attr("href")
329                .ok_or_else(|| EpubError::MissingRequiredAttribute {
330                    tag: element.tag_name(),
331                    attribute: "href".to_string(),
332                })?
333                .to_string();
334            let mime = element
335                .get_attr("media-type")
336                .ok_or_else(|| EpubError::MissingRequiredAttribute {
337                    tag: element.tag_name(),
338                    attribute: "media-type".to_string(),
339                })?
340                .to_string();
341            let properties = element.get_attr("properties");
342            let fallback = element.get_attr("fallback");
343
344            resources.insert(
345                id.clone(),
346                ManifestItem {
347                    id,
348                    path: self.normalize_manifest_path(&path)?,
349                    mime,
350                    properties,
351                    fallback,
352                },
353            );
354        }
355
356        self.manifest = resources;
357        self.validate_fallback_chains();
358        Ok(())
359    }
360
361    /// Parse the EPUB spine section
362    ///
363    /// This function parses the `<spine>` elements in the OPF file to extract
364    /// the reading order information of the publication. The spine defines the
365    /// linear reading order of the publication's content documents, and each
366    /// spine item references resources in the manifest.
367    ///
368    /// ## Parameters
369    /// - `spine_element`: A reference to the `<spine>` element in the OPF file
370    fn parse_spine(&mut self, spine_element: &XmlElement) -> Result<(), EpubError> {
371        let mut spine = Vec::new();
372        for element in spine_element.children() {
373            let idref = element
374                .get_attr("idref")
375                .ok_or_else(|| EpubError::MissingRequiredAttribute {
376                    tag: element.tag_name(),
377                    attribute: "idref".to_string(),
378                })?
379                .to_string();
380            let id = element.get_attr("id");
381            let linear = element
382                .get_attr("linear")
383                .map(|linear| linear == "yes")
384                .unwrap_or(true);
385            let properties = element.get_attr("properties");
386
387            spine.push(SpineItem { idref, id, linear, properties });
388        }
389
390        self.spine = spine;
391        Ok(())
392    }
393
394    /// Parse the EPUB encryption file (META-INF/encryption.xml)
395    ///
396    /// This function is responsible for parsing the `encryption.xml` file
397    /// in the `META-INF` directory to extract information about encrypted
398    /// resources in the publication. According to the EPUB specification,
399    /// the encryption information describes which resources are encrypted
400    /// and the encryption methods used.
401    ///
402    /// TODO: 需要对使用非对称加密数据的加密项进行额外处理,以获取非对称加密密钥
403    fn parse_encryption(&mut self) -> Result<(), EpubError> {
404        if !self.has_encryption() {
405            return Ok(());
406        }
407
408        let mut archive = self.archive.lock()?;
409        let encryption_file =
410            get_file_in_zip_archive(&mut archive, "META-INF/encryption.xml")?.decode()?;
411
412        let root = XmlReader::parse(&encryption_file)?;
413
414        let mut encryption_data = Vec::new();
415        for data in root.children() {
416            if data.name != "EncryptedData" {
417                continue;
418            }
419
420            let method = data
421                .find_elements_by_name("EncryptionMethod")
422                .next()
423                .ok_or_else(|| EpubError::NonCanonicalFile {
424                    tag: "EncryptionMethod".to_string(),
425                })?;
426            let reference = data
427                .find_elements_by_name("CipherReference")
428                .next()
429                .ok_or_else(|| EpubError::NonCanonicalFile {
430                    tag: "CipherReference".to_string(),
431                })?;
432
433            encryption_data.push(EncryptionData {
434                method: method
435                    .get_attr("Algorithm")
436                    .ok_or_else(|| EpubError::MissingRequiredAttribute {
437                        tag: "EncryptionMethod".to_string(),
438                        attribute: "Algorithm".to_string(),
439                    })?
440                    .to_string(),
441                data: reference
442                    .get_attr("URI")
443                    .ok_or_else(|| EpubError::MissingRequiredAttribute {
444                        tag: "CipherReference".to_string(),
445                        attribute: "URI".to_string(),
446                    })?
447                    .to_string(),
448            });
449        }
450
451        if !encryption_data.is_empty() {
452            self.encryption = Some(encryption_data);
453        }
454
455        Ok(())
456    }
457
458    /// Parse the EPUB navigation information
459    ///
460    /// This function is responsible for parsing the navigation information of EPUB
461    /// publications. Different parsing strategies are used depending on the EPUB version:
462    /// - EPUB 2.0: Parses the NCX file to obtain directory information
463    /// - EPUB 3.0: Parses the Navigation Document (NAV) file to obtain directory information
464    fn parse_catalog(&mut self) -> Result<(), EpubError> {
465        const HEAD_TAGS: [&str; 6] = ["h1", "h2", "h3", "h4", "h5", "h6"];
466
467        let mut archive = self.archive.lock()?;
468        match self.version {
469            EpubVersion::Version2_0 => {
470                let opf_file =
471                    get_file_in_zip_archive(&mut archive, self.package_path.to_str().unwrap())?
472                        .decode()?;
473                let opf_element = XmlReader::parse(&opf_file)?;
474
475                let toc_id = opf_element
476                    .find_children_by_name("spine")
477                    .next()
478                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?
479                    .get_attr("toc")
480                    .ok_or_else(|| EpubError::MissingRequiredAttribute {
481                        tag: "spine".to_string(),
482                        attribute: "toc".to_string(),
483                    })?
484                    .to_owned();
485                let toc_path = self
486                    .manifest
487                    .get(&toc_id)
488                    .ok_or(EpubError::ResourceIdNotExist { id: toc_id })?
489                    .path
490                    .to_str()
491                    .unwrap();
492
493                let ncx_file = get_file_in_zip_archive(&mut archive, toc_path)?.decode()?;
494                let ncx = XmlReader::parse(&ncx_file)?;
495
496                match ncx.find_elements_by_name("docTitle").next() {
497                    Some(element) => self.catalog_title = element.text(),
498                    None => warn!(
499                        "Expecting to get docTitle information from the ncx file, but it's missing."
500                    ),
501                };
502
503                let nav_map = ncx
504                    .find_elements_by_name("navMap")
505                    .next()
506                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: "navMap".to_string() })?;
507
508                self.catalog = self.parse_nav_points(nav_map)?;
509
510                Ok(())
511            }
512
513            EpubVersion::Version3_0 => {
514                let nav_path = self
515                    .manifest
516                    .values()
517                    .find(|item| {
518                        if let Some(property) = &item.properties {
519                            return property.contains("nav");
520                        }
521                        false
522                    })
523                    .map(|item| item.path.clone())
524                    .ok_or_else(|| EpubError::NonCanonicalEpub {
525                        expected_file: "Navigation Document".to_string(),
526                    })?;
527
528                let nav_file =
529                    get_file_in_zip_archive(&mut archive, nav_path.to_str().unwrap())?.decode()?;
530
531                let nav_element = XmlReader::parse(&nav_file)?;
532                let nav = nav_element
533                    .find_elements_by_name("nav")
534                    .find(|&element| element.get_attr("epub:type") == Some(String::from("toc")))
535                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: "nav".to_string() })?;
536                let nav_title = nav.find_children_by_names(&HEAD_TAGS).next();
537                let nav_list = nav
538                    .find_children_by_name("ol")
539                    .next()
540                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: "ol".to_string() })?;
541
542                self.catalog = self.parse_catalog_list(nav_list)?;
543                if let Some(nav_title) = nav_title {
544                    self.catalog_title = nav_title.text();
545                };
546                Ok(())
547            }
548        }
549    }
550
551    /// Check if the EPUB file contains `encryption.xml`
552    ///
553    /// This function determines whether a publication contains encrypted resources
554    /// by checking if a `META-INF/encryption.xml` file exists in the EPUB package.
555    /// According to the EPUB specification, when resources in a publication are
556    /// encrypted, the corresponding encryption information must be declared in
557    /// the `META-INF/encryption.xml` file.
558    ///
559    /// ## Return
560    /// - `true` if the publication contains encrypted resources
561    /// - `false` if the publication does not contain encrypted resources
562    ///
563    /// ## Notes
564    /// - This function only checks the existence of the encrypted file;
565    ///   it does not verify the validity of the encrypted information.
566    pub fn has_encryption(&self) -> bool {
567        self.has_encryption
568    }
569
570    /// Retrieves a list of metadata items
571    ///
572    /// This function retrieves all matching metadata items from the EPUB metadata
573    /// based on the specified attribute name (key). Metadata items may come from
574    /// the DC (Dublin Core) namespace or the OPF namespace and contain basic
575    /// information about the publication, such as title, author, identifier, etc.
576    ///
577    /// ## Parameters
578    /// - `key`: The name of the metadata attribute to retrieve
579    ///
580    /// ## Return
581    /// - `Some(Vec<MetadataItem>)`: A vector containing all matching metadata items
582    /// - `None`: If no matching metadata items are found
583    pub fn get_metadata(&self, key: &str) -> Option<Vec<MetadataItem>> {
584        let metadatas = self
585            .metadata
586            .iter()
587            .filter(|item| item.property == key)
588            .cloned()
589            .collect::<Vec<MetadataItem>>();
590
591        (!metadatas.is_empty()).then_some(metadatas)
592    }
593
594    /// Retrieves a list of values for specific metadata items
595    ///
596    /// This function retrieves the values ​​of all matching metadata items from
597    /// the EPUB metadata based on the given property name (key).
598    ///
599    /// ## Parameters
600    /// - `key`: The name of the metadata attribute to retrieve
601    ///
602    /// ## Return
603    /// - `Some(Vec<String>)`: A vector containing all matching metadata item values
604    /// - `None`: If no matching metadata items are found
605    pub fn get_metadata_value(&self, key: &str) -> Option<Vec<String>> {
606        let values = self
607            .metadata
608            .iter()
609            .filter(|item| item.property == key)
610            .map(|item| item.value.clone())
611            .collect::<Vec<String>>();
612
613        (!values.is_empty()).then_some(values)
614    }
615
616    /// Retrieves the title of the publication
617    ///
618    /// This function retrieves all title information from the EPUB metadata.
619    /// According to the EPUB specification, a publication can have multiple titles,
620    /// which are returned in the order they appear in the metadata.
621    ///
622    /// ## Return
623    /// - `Result<Vec<String>, EpubError>`: A vector containing all title information
624    /// - `EpubError`: If and only if the OPF file does not contain `<dc:title>`
625    ///
626    /// ## Notes
627    /// - The EPUB specification requires each publication to have at least one title.
628    pub fn get_title(&self) -> Result<Vec<String>, EpubError> {
629        self.get_metadata_value("title")
630            .ok_or_else(|| EpubError::NonCanonicalFile { tag: "title".to_string() })
631    }
632
633    /// Retrieves the language used in the publication
634    ///
635    /// This function retrieves the language information of a publication from the EPUB
636    /// metadata. According to the EPUB specification, language information identifies
637    /// the primary language of the publication and can have multiple language identifiers.
638    ///
639    /// ## Return
640    /// - `Ok(Vec<String>)`: A vector containing all language identifiers
641    /// - `Err(EpubError)`: If and only if the OPF file does not contain `<dc:language>`
642    ///
643    /// ## Notes
644    /// - The EPUB specification requires that each publication specify at least one primary language.
645    /// - Language identifiers should conform to RFC 3066 or later standards.
646    pub fn get_language(&self) -> Result<Vec<String>, EpubError> {
647        self.get_metadata_value("language")
648            .ok_or_else(|| EpubError::NonCanonicalFile { tag: "language".to_string() })
649    }
650
651    /// Retrieves the identifier of a publication
652    ///
653    /// This function retrieves the identifier information of a publication from
654    /// the EPUB metadata. According to the EPUB specification, each publication
655    /// must have a identifier, typically an ISBN, UUID, or other unique identifier.
656    ///
657    /// ## Return
658    /// - `Ok(Vec<String>)`: A vector containing all identifier information
659    /// - `Err(EpubError)`: If and only if the OPF file does not contain `<dc:identifier>`
660    ///
661    /// ## Notes
662    /// - The EPUB specification requires each publication to have at least one identifier.
663    /// - In the OPF file, the `unique-identifier` attribute of the `<package>` element
664    ///   should point to a `<dc:identifier>` element used to uniquely identify the publication.
665    ///   This means that `unique-identifier` is not exactly equal to `<dc:identifier>`.
666    pub fn get_identifier(&self) -> Result<Vec<String>, EpubError> {
667        self.get_metadata_value("identifier")
668            .ok_or_else(|| EpubError::NonCanonicalFile { tag: "identifier".to_string() })
669    }
670
671    /// Retrieve resource data by resource ID
672    ///
673    /// This function will find the resource with the specified ID in the manifest.
674    /// If the resource is encrypted, it will be automatically decrypted.
675    ///
676    /// ## Parameters
677    /// - `id`: The ID of the resource to retrieve
678    ///
679    /// ## Return
680    /// - `Ok((Vec<u8>, String))`: Successfully retrieved and decrypted resource data and
681    ///   the MIME type
682    /// - `Err(EpubError)`: Errors that occurred during the retrieval process
683    ///
684    /// ## Notes
685    /// - This function will automatically decrypt the resource if it is encrypted.
686    /// - For unsupported encryption methods, the corresponding error will be returned.
687    pub fn get_manifest_item(&self, id: &str) -> Result<(Vec<u8>, String), EpubError> {
688        let resource_item = self
689            .manifest
690            .get(id)
691            .cloned()
692            .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
693
694        let path = resource_item.path.to_str().unwrap();
695
696        let mut archive = self.archive.lock()?;
697        let mut data = match archive.by_name(path) {
698            Ok(mut file) => {
699                let mut entry = Vec::<u8>::new();
700                file.read_to_end(&mut entry)?;
701
702                Ok(entry)
703            }
704            Err(ZipError::FileNotFound) => {
705                Err(EpubError::ResourceNotFound { resource: path.to_string() })
706            }
707            Err(err) => Err(EpubError::from(err)),
708        }?;
709
710        if let Some(method) = self.is_encryption_file(path) {
711            data = self.auto_dencrypt(&method, &mut data)?;
712        }
713
714        Ok((data, resource_item.mime))
715    }
716
717    /// Retrieves resource item data by resource path
718    ///
719    /// This function retrieves resources from the manifest based on the input path.
720    /// The input path must be a relative path to the root directory of the EPUB container;
721    /// using an absolute path or a relative path to another location will result in an error.
722    ///
723    /// ## Parameters
724    /// - `path`: The path of the resource to retrieve
725    ///
726    /// ## Return
727    /// - `Ok((Vec<u8>, String))`: Successfully retrieved and decrypted resource data and
728    ///   the MIME type
729    /// - `Err(EpubError)`: Errors that occurred during the retrieval process
730    ///
731    /// ## Notes
732    /// - This function will automatically decrypt the resource if it is encrypted.
733    /// - For unsupported encryption methods, the corresponding error will be returned.
734    /// - Relative paths other than the root directory of the Epub container are not supported.
735    pub fn get_manifest_item_by_path(&self, path: &str) -> Result<(Vec<u8>, String), EpubError> {
736        let id = self
737            .manifest
738            .iter()
739            .find(|(_, item)| item.path.to_str().unwrap() == path)
740            .map(|(id, _)| id.to_string())
741            .ok_or_else(|| EpubError::ResourceNotFound { resource: path.to_string() })?;
742
743        self.get_manifest_item(&id)
744    }
745
746    /// Retrieves supported resource items by resource ID, with fallback mechanism supported
747    ///
748    /// This function attempts to retrieve the resource item with the specified ID and
749    /// checks if its MIME type is in the list of supported formats. If the current resource
750    /// format is not supported, it searches for a supported resource format along the
751    /// fallback chain according to the fallback mechanism defined in the EPUB specification.
752    ///
753    /// ## Parameters
754    /// - `id`: The ID of the resource to retrieve
755    /// - `supported_format`: A vector of supported MIME types
756    ///
757    /// ## Return
758    /// - `Ok((Vec<u8>, String))`: Successfully retrieved and decrypted resource data and
759    ///   the MIME type
760    /// - `Err(EpubError)`: Errors that occurred during the retrieval process
761    pub fn get_manifest_item_with_fallback(
762        &self,
763        id: &str,
764        supported_format: Vec<&str>,
765    ) -> Result<(Vec<u8>, String), EpubError> {
766        let mut manifest_item = self
767            .manifest
768            .get(id)
769            .cloned()
770            .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
771
772        let mut current_manifest_id = id.to_string();
773        let mut fallback_chain = Vec::<String>::new();
774        'fallback: loop {
775            if supported_format.contains(&manifest_item.mime.as_str()) {
776                return self.get_manifest_item(&current_manifest_id);
777            }
778
779            let fallback_id = manifest_item.fallback.clone();
780
781            match fallback_id {
782                // The loop ends when no fallback resource exists
783                None => break 'fallback,
784
785                // End the loop when the loop continues to fallback if a fallback resource exists
786                Some(id) if fallback_chain.contains(&id) => break 'fallback,
787
788                Some(id) => {
789                    fallback_chain.push(id.clone());
790
791                    // Since only warnings are issued for fallback resource checks
792                    // during initialization, the issue of fallback resources possibly
793                    // not existing needs to be handled here.
794                    manifest_item = self
795                        .manifest
796                        .get(&manifest_item.fallback.unwrap())
797                        .cloned()
798                        .ok_or(EpubError::ResourceIdNotExist { id: id.clone() })?;
799                    current_manifest_id = id;
800                }
801            };
802        }
803
804        Err(EpubError::NoSupportedFileFormat)
805    }
806
807    /// Retrieves the cover of the EPUB document
808    ///
809    /// This function searches for the cover of the EPUB document by examining manifest
810    /// items in the manifest. It looks for manifest items whose ID or attribute contains
811    /// "cover" (case-insensitive) and attempts to retrieve the content of the first match.
812    ///
813    /// ## Return
814    /// - `Some((Vec<u8>, String))`: Successfully retrieved and decrypted cover data and
815    ///   the MIME type
816    /// - `None`: No cover resource was found
817    ///
818    /// ## Notes
819    /// - This function only returns the first successfully retrieved cover resource,
820    ///   even if multiple matches exist
821    /// - The retrieved cover may not be an image resource; users need to pay attention
822    ///   to the resource's MIME type.
823    pub fn get_cover(&self) -> Option<(Vec<u8>, String)> {
824        self.manifest
825            .values()
826            .filter_map(|manifest| {
827                if manifest.id.to_ascii_lowercase().contains("cover") {
828                    return Some(manifest.id.clone());
829                }
830
831                if let Some(properties) = &manifest.properties {
832                    if properties.to_ascii_lowercase().contains("cover") {
833                        return Some(manifest.id.clone());
834                    }
835                }
836
837                None
838            })
839            .collect::<Vec<String>>()
840            .iter()
841            .find_map(|id| self.get_manifest_item(id).ok())
842    }
843
844    /// Navigate to a specified chapter using the spine index
845    ///
846    /// This function retrieves the content data of the corresponding chapter based
847    /// on the index position in the EPUB spine. The spine defines the linear reading
848    /// order of the publication's content documents, and each spine item references
849    /// resources in the manifest.
850    ///
851    /// ## Parameters
852    /// - `index`: The index position in the spine, starting from 0
853    ///
854    /// ## Return
855    /// - `Some((Vec<u8>, String))`: Successfully retrieved chapter content data and the MIME type
856    /// - `None`: Index out of range or data retrieval error
857    ///
858    /// ## Notes
859    /// - The index must be less than the total number of spine projects.
860    /// - If the resource is encrypted, it will be automatically decrypted before returning.(TODO)
861    /// - It does not check whether the Spine project follows a linear reading order.
862    pub fn navigate_by_spine_index(&mut self, index: usize) -> Option<(Vec<u8>, String)> {
863        if index >= self.spine.len() {
864            return None;
865        }
866
867        let manifest_id = self.spine[index].idref.clone();
868        self.current_spine_index.store(index, Ordering::SeqCst);
869        self.get_manifest_item(&manifest_id).ok()
870    }
871
872    /// Navigate to the previous linear reading chapter
873    ///
874    /// This function searches backwards in the EPUB spine for the previous linear
875    /// reading chapter and returns the content data of that chapter. It only navigates
876    /// to chapters marked as linear reading.
877    ///
878    /// ## Return
879    /// - `Some((Vec<u8>, String))`: Successfully retrieved previous chapter content data and
880    ///   the MIME type
881    /// - `None`: Already in the first chapter, the current chapter is not linear,
882    ///   or data retrieval failed
883    pub fn spine_prev(&self) -> Option<(Vec<u8>, String)> {
884        let current_index = self.current_spine_index.load(Ordering::SeqCst);
885        if current_index == 0 || !self.spine[current_index].linear {
886            return None;
887        }
888
889        let prev_index = (0..current_index)
890            .rev()
891            .find(|&index| self.spine[index].linear)?;
892
893        self.current_spine_index.store(prev_index, Ordering::SeqCst);
894        let manifest_id = self.spine[prev_index].idref.clone();
895        self.get_manifest_item(&manifest_id).ok()
896    }
897
898    /// Navigate to the next linear reading chapter
899    ///
900    /// This function searches forwards in the EPUB spine for the next linear reading
901    /// chapter and returns the content data of that chapter. It only navigates to
902    /// chapters marked as linear reading.
903    ///
904    /// ## Return
905    /// - `Some((Vec<u8>, String))`: Successfully retrieved next chapter content data and
906    ///   the MIME type
907    /// - `None`: Already in the last chapter, the current chapter is not linear,
908    ///   or data retrieval failed
909    pub fn spine_next(&mut self) -> Option<(Vec<u8>, String)> {
910        let current_index = self.current_spine_index.load(Ordering::SeqCst);
911        if current_index >= self.spine.len() - 1 || !self.spine[current_index].linear {
912            return None;
913        }
914
915        let next_index =
916            (current_index + 1..self.spine.len()).find(|&index| self.spine[index].linear)?;
917
918        self.current_spine_index.store(next_index, Ordering::SeqCst);
919        let manifest_id = self.spine[next_index].idref.clone();
920        self.get_manifest_item(&manifest_id).ok()
921    }
922
923    /// Retrieves the content data of the current chapter
924    ///
925    /// This function returns the content data of the chapter at the current
926    /// index position in the EPUB spine.
927    ///
928    /// ## Return
929    /// - `Some((Vec<u8>, String))`: Successfully retrieved current chapter content data and
930    ///   the MIME type
931    /// - `None`: Data retrieval failed
932    pub fn spine_current(&self) -> Option<(Vec<u8>, String)> {
933        let manifest_id = self.spine[self.current_spine_index.load(Ordering::SeqCst)]
934            .idref
935            .clone();
936        self.get_manifest_item(&manifest_id).ok()
937    }
938
939    /// Determine the EPUB version from the OPF file
940    ///
941    /// This function is used to detect the version of an epub file from an OPF file.
942    /// When the version attribute in the package is abnormal, version information will
943    /// be identified through some version characteristics of the epub file. An error is
944    /// returned when neither direct nor indirect methods can identify the version.
945    ///
946    /// ## Parameters
947    /// - `opf_element`: A reference to the OPF file element
948    fn determine_epub_version(opf_element: &XmlElement) -> Result<EpubVersion, EpubError> {
949        // Check the explicit version attribute
950        if let Some(version) = opf_element.get_attr("version") {
951            match version.as_str() {
952                "2.0" => return Ok(EpubVersion::Version2_0),
953                "3.0" => return Ok(EpubVersion::Version3_0),
954                _ => {}
955            }
956        }
957
958        let spine_element = opf_element
959            .find_elements_by_name("spine")
960            .next()
961            .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?;
962
963        // Look for EPUB 2.x specific features
964        if spine_element.get_attr("toc").is_some() {
965            return Ok(EpubVersion::Version2_0);
966        }
967
968        let manifest_element = opf_element
969            .find_elements_by_name("manifest")
970            .next()
971            .ok_or_else(|| EpubError::NonCanonicalFile { tag: "manifest".to_string() })?;
972
973        // Look for EPUB 3.x specific features
974        manifest_element
975            .children()
976            .find_map(|element| {
977                if let Some(id) = element.get_attr("id") {
978                    if id.eq("nav") {
979                        return Some(EpubVersion::Version3_0);
980                    }
981                }
982
983                None
984            })
985            .ok_or(EpubError::UnrecognizedEpubVersion)
986    }
987
988    /// Parse metadata elements under the Dublin Core namespace
989    ///
990    /// This function handles the `<metadata>` Dublin Core element in the OPF file (namespace
991    /// is "http://purl.org/dc/elements/1.1/"). These elements usually contain the basic
992    /// information of the publication, such as title, author, publication date, etc.
993    ///
994    /// ## Notes
995    /// - In EPUB 3.0, granular information is handled by separate '<meta>' elements and 'refines' attributes
996    /// - All text content is normalized by whitespace
997    #[inline]
998    fn parse_dc_metadata(
999        &self,
1000        element: &XmlElement,
1001        metadata: &mut Vec<MetadataItem>,
1002        // refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1003    ) -> Result<(), EpubError> {
1004        let id = element.get_attr("id");
1005        let lang = element.get_attr("lang");
1006        let property = element.name.clone();
1007        let value = element.text().normalize_whitespace();
1008
1009        let refined = match self.version {
1010            // In EPUB 2.0, supplementary metadata (refinements) are represented
1011            // through other attribute data pairs of the tag.
1012            EpubVersion::Version2_0 => element
1013                .attributes
1014                .iter()
1015                .map(|(name, value)| {
1016                    let property = name.to_string();
1017                    let value = value.to_string().normalize_whitespace();
1018
1019                    MetadataRefinement {
1020                        refines: id.clone().unwrap(),
1021                        property,
1022                        value,
1023                        lang: None,
1024                        scheme: None,
1025                    }
1026                })
1027                .collect(),
1028            EpubVersion::Version3_0 => vec![],
1029        };
1030
1031        metadata.push(MetadataItem { id, property, value, lang, refined });
1032
1033        Ok(())
1034    }
1035
1036    /// Parse metadata elements under the OPF namespace
1037    ///
1038    /// This function handles the `<metadata>` OPF element in the OPF file (namespace
1039    /// is "http://www.idpf.org/2007/opf"). These elements include '<meta>' and '<link>',
1040    /// which are used to provide extended metadata and links to external resources for EPUB publications.
1041    ///
1042    /// ## Notes
1043    /// - The function is only responsible for distribution processing, and the
1044    ///   specific parsing logic is implemented in the dedicated function
1045    /// - All parsing results are added directly to the incoming collection and no new collection is returned
1046    #[inline]
1047    fn parse_opf_metadata(
1048        &self,
1049        element: &XmlElement,
1050        metadata: &mut Vec<MetadataItem>,
1051        metadata_link: &mut Vec<MetadataLinkItem>,
1052        refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1053    ) -> Result<(), EpubError> {
1054        match element.name.as_str() {
1055            "meta" => self.parse_meta_element(element, metadata, refinements),
1056            "link" => self.parse_link_element(element, metadata_link),
1057            _ => Ok(()),
1058        }
1059    }
1060
1061    #[inline]
1062    fn parse_meta_element(
1063        &self,
1064        element: &XmlElement,
1065        metadata: &mut Vec<MetadataItem>,
1066        refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1067    ) -> Result<(), EpubError> {
1068        match self.version {
1069            EpubVersion::Version2_0 => {
1070                let property = element
1071                    .get_attr("name")
1072                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: element.tag_name() })?;
1073                let value = element
1074                    .get_attr("content")
1075                    .ok_or_else(|| EpubError::MissingRequiredAttribute {
1076                        tag: element.tag_name(),
1077                        attribute: "content".to_string(),
1078                    })?
1079                    .normalize_whitespace();
1080
1081                metadata.push(MetadataItem {
1082                    id: None,
1083                    property,
1084                    value,
1085                    lang: None,
1086                    refined: vec![],
1087                });
1088            }
1089
1090            EpubVersion::Version3_0 => {
1091                let property = element.get_attr("property").ok_or_else(|| {
1092                    EpubError::MissingRequiredAttribute {
1093                        tag: element.tag_name(),
1094                        attribute: "property".to_string(),
1095                    }
1096                })?;
1097                let value = element.text().normalize_whitespace();
1098                let lang = element.get_attr("lang");
1099
1100                if let Some(refines) = element.get_attr("refines") {
1101                    let id = refines.strip_prefix("#").unwrap_or(&refines).to_string();
1102                    let scheme = element.get_attr("scheme");
1103                    let refinement = MetadataRefinement {
1104                        refines: id.clone(),
1105                        property,
1106                        value,
1107                        lang,
1108                        scheme,
1109                    };
1110
1111                    if let Some(refinements) = refinements.get_mut(&id) {
1112                        refinements.push(refinement);
1113                    } else {
1114                        refinements.insert(id, vec![refinement]);
1115                    }
1116                } else {
1117                    let id = element.get_attr("id");
1118                    let item = MetadataItem {
1119                        id,
1120                        property,
1121                        value,
1122                        lang,
1123                        refined: vec![],
1124                    };
1125
1126                    metadata.push(item);
1127                };
1128            }
1129        }
1130        Ok(())
1131    }
1132
1133    #[inline]
1134    fn parse_link_element(
1135        &self,
1136        element: &XmlElement,
1137        metadata_link: &mut Vec<MetadataLinkItem>,
1138    ) -> Result<(), EpubError> {
1139        let href = element
1140            .get_attr("href")
1141            .ok_or_else(|| EpubError::MissingRequiredAttribute {
1142                tag: element.tag_name(),
1143                attribute: "href".to_string(),
1144            })?;
1145        let rel = element
1146            .get_attr("rel")
1147            .ok_or_else(|| EpubError::MissingRequiredAttribute {
1148                tag: element.tag_name(),
1149                attribute: "rel".to_string(),
1150            })?;
1151        let hreflang = element.get_attr("hreflang");
1152        let id = element.get_attr("id");
1153        let mime = element.get_attr("media-type");
1154        let properties = element.get_attr("properties");
1155
1156        metadata_link.push(MetadataLinkItem {
1157            href,
1158            rel,
1159            hreflang,
1160            id,
1161            mime,
1162            properties,
1163            refines: None,
1164        });
1165        Ok(())
1166    }
1167
1168    /// Recursively parse NCX navigation points from navMap or nested navPoint elements
1169    ///
1170    /// This function parses the hierarchical navigation structure defined in NCX files
1171    /// for EPUB 2.x documents. It handles nested navPoint elements to build a complete
1172    /// tree representation of the publication's table of contents.
1173    fn parse_nav_points(&self, parent_element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1174        let mut nav_points = Vec::new();
1175        for nav_point in parent_element.find_children_by_name("navPoint") {
1176            let label = match nav_point.find_children_by_name("navLabel").next() {
1177                Some(element) => element.text(),
1178                None => String::new(),
1179            };
1180
1181            let content = nav_point
1182                .find_children_by_name("content")
1183                .next()
1184                .map(|element| PathBuf::from(element.text()));
1185
1186            let play_order = nav_point
1187                .get_attr("playOrder")
1188                .and_then(|order| order.parse::<usize>().ok());
1189
1190            let children = self.parse_nav_points(nav_point)?;
1191
1192            nav_points.push(NavPoint { label, content, play_order, children });
1193        }
1194
1195        nav_points.sort();
1196        Ok(nav_points)
1197    }
1198
1199    /// Recursively parses directory list structures
1200    ///
1201    /// This function recursively parses HTML navigation list structures,
1202    /// converting `<ol>` and `<li>` elements into NavPoint structures.
1203    /// Multi-level nested directory structures are supported.
1204    fn parse_catalog_list(&self, element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1205        let mut catalog = Vec::new();
1206        for item in element.children() {
1207            if item.tag_name() != "li" {
1208                return Err(EpubError::NonCanonicalFile { tag: "li".to_string() });
1209            }
1210
1211            let title_element = item
1212                .find_children_by_names(&["span", "a"])
1213                .next()
1214                .ok_or_else(|| EpubError::NonCanonicalFile { tag: "span/a".to_string() })?;
1215            let content_href = title_element.get_attr("href").map(PathBuf::from);
1216            let sub_list = if let Some(list) = item.find_children_by_name("ol").next() {
1217                self.parse_catalog_list(list)?
1218            } else {
1219                vec![]
1220            };
1221
1222            catalog.push(NavPoint {
1223                label: title_element.text(),
1224                content: content_href,
1225                children: sub_list,
1226                play_order: None,
1227            });
1228        }
1229
1230        Ok(catalog)
1231    }
1232
1233    /// Converts relative paths in the manifest to normalized paths
1234    /// relative to the EPUB root directory
1235    ///
1236    /// This function processes the href attribute of resources in the EPUB
1237    /// manifest and converts it to a normalized path representation.
1238    /// It handles three types of paths:
1239    /// - Relative paths starting with `../` (checks if they exceed the EPUB package scope)
1240    /// - Absolute paths starting with `/` (relative to the EPUB root directory)
1241    /// - Other relative paths (relative to the directory containing the OPF file)
1242    ///
1243    /// ## Parameters
1244    /// - `path`: The href attribute value of the resource in the manifest
1245    ///
1246    /// ## Return
1247    /// - `Ok(PathBuf)`: The parsed normalized path
1248    /// - `Err(EpubError)`: Relative link leakage
1249    #[inline]
1250    fn normalize_manifest_path(&self, path: &str) -> Result<PathBuf, EpubError> {
1251        let mut path = if path.starts_with("../") {
1252            let mut current_dir = self.epub_path.join(&self.package_path);
1253            current_dir.pop();
1254
1255            check_realtive_link_leakage(self.epub_path.clone(), current_dir, path)
1256                .map(PathBuf::from)
1257                .ok_or_else(|| EpubError::RealtiveLinkLeakage { path: path.to_string() })?
1258        } else if let Some(path) = path.strip_prefix("/") {
1259            PathBuf::from(path.to_string())
1260        } else {
1261            self.base_path.join(path)
1262        };
1263
1264        #[cfg(windows)]
1265        {
1266            path = PathBuf::from(path.to_string_lossy().replace('\\', "/"));
1267        }
1268
1269        Ok(path)
1270    }
1271
1272    /// Verify the fallback chain of all manifest items
1273    ///
1274    /// This function iterates through all manifest items with the fallback
1275    /// attribute and verifies the validity of their fallback chains, including checking:
1276    /// - Whether circular references exist
1277    /// - Whether the fallback resource exists in the manifest
1278    ///
1279    /// ## Notes
1280    /// If an invalid fallback chain is found, a warning log will be logged
1281    /// but the processing flow will not be interrupted.
1282    fn validate_fallback_chains(&self) {
1283        for (id, item) in &self.manifest {
1284            if item.fallback.is_none() {
1285                continue;
1286            }
1287
1288            let mut fallback_chain = Vec::new();
1289            if let Err(msg) = self.validate_fallback_chain(id, &mut fallback_chain) {
1290                warn!("Invalid fallback chain for item {}: {}", id, msg);
1291            }
1292        }
1293    }
1294
1295    /// Recursively verify the validity of a single fallback chain
1296    ///
1297    /// This function recursively traces the fallback chain to check for the following issues:
1298    /// - Circular reference
1299    /// - The referenced fallback resource does not exist
1300    ///
1301    /// ## Parameters
1302    /// - `manifest_id`: The id of the manifest item currently being verified
1303    /// - `fallback_chain`: The visited fallback chain paths used to detect circular references
1304    ///
1305    /// ## Return
1306    /// - `Ok(())`: The fallback chain is valid
1307    /// - `Err(String)`: A string containing error information
1308    fn validate_fallback_chain(
1309        &self,
1310        manifest_id: &str,
1311        fallback_chain: &mut Vec<String>,
1312    ) -> Result<(), String> {
1313        if fallback_chain.contains(&manifest_id.to_string()) {
1314            fallback_chain.push(manifest_id.to_string());
1315
1316            return Err(format!(
1317                "Circular reference detected in fallback chain for {}",
1318                fallback_chain.join("->")
1319            ));
1320        }
1321
1322        // Get the current item; its existence can be ensured based on the calling context.
1323        let item = self.manifest.get(manifest_id).unwrap();
1324
1325        if let Some(fallback_id) = &item.fallback {
1326            if !self.manifest.contains_key(fallback_id) {
1327                return Err(format!(
1328                    "Fallback resource {} does not exist in manifest",
1329                    fallback_id
1330                ));
1331            }
1332
1333            fallback_chain.push(manifest_id.to_string());
1334            self.validate_fallback_chain(fallback_id, fallback_chain)
1335        } else {
1336            // The end of the fallback chain
1337            Ok(())
1338        }
1339    }
1340
1341    /// Checks if a resource at the specified path is an encrypted file
1342    ///
1343    /// This function queries whether a specific resource path is marked as an encrypted
1344    /// file in the EPUB encryption information. It checks the encrypted data stored in
1345    /// `self.encryption`, looking for an entry that matches the given path.
1346    ///
1347    /// ## Parameters
1348    /// - `path`: The path of the resource to check
1349    ///
1350    /// ## Return
1351    /// - `Some(String)`: The encryption method used for the resource
1352    /// - `None`: The resource is not encrypted
1353    fn is_encryption_file(&self, path: &str) -> Option<String> {
1354        self.encryption.as_ref().and_then(|encryptions| {
1355            encryptions
1356                .iter()
1357                .find(|encryption| encryption.data == path)
1358                .map(|encryption| encryption.method.clone())
1359        })
1360    }
1361
1362    /// Automatically decrypts encrypted resource data
1363    ///
1364    /// Automatically decrypts data based on the provided encryption method.
1365    /// This function supports various encryption methods defined by the EPUB
1366    /// specification, including font obfuscation and the XML encryption standard.
1367    ///
1368    /// ## Parameters
1369    /// - `method`: The encryption method used for the resource
1370    /// - `data`: The encrypted resource data
1371    ///
1372    /// ## Return
1373    /// - `Ok(Vec<u8>)`: The decrypted resource data
1374    /// - `Err(EpubError)`: Unsupported encryption method
1375    ///
1376    /// ## Supported Encryption Methods
1377    /// - IDPF font obfuscation: `http://www.idpf.org/2008/embedding`
1378    /// - Adobe font obfuscation: `http://ns.adobe.com/pdf/enc#RC`
1379    #[inline]
1380    fn auto_dencrypt(&self, method: &str, data: &mut [u8]) -> Result<Vec<u8>, EpubError> {
1381        match method {
1382            "http://www.idpf.org/2008/embedding" => {
1383                Ok(idpf_font_dencryption(data, &self.unique_identifier))
1384            }
1385            "http://ns.adobe.com/pdf/enc#RC" => {
1386                Ok(adobe_font_dencryption(data, &self.unique_identifier))
1387            }
1388            _ => Err(EpubError::UnsupportedEncryptedMethod { method: method.to_string() }),
1389        }
1390    }
1391}
1392
1393impl EpubDoc<BufReader<File>> {
1394    /// Creates a new EPUB document instance
1395    ///
1396    /// This function is a convenience constructor for `EpubDoc`,
1397    /// used to create an EPUB parser instance directly from a file path.
1398    ///
1399    /// ## Parameters
1400    /// - `path`: The path to the EPUB file
1401    ///
1402    /// ## Return
1403    /// - `Ok(EpubDoc)`: The created EPUB document instance
1404    /// - `Err(EpubError)`: An error occurred during initialization
1405    pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, EpubError> {
1406        let file = File::open(&path).map_err(EpubError::from)?;
1407        let path = fs::canonicalize(path)?;
1408
1409        Self::from_reader(BufReader::new(file), path)
1410    }
1411
1412    /// Validates whether a file is a valid EPUB document
1413    ///
1414    /// This function attempts to open and parse the given file as an EPUB document.
1415    /// It performs basic validation to determine if the file conforms to the EPUB specification.
1416    ///
1417    /// ## Parameters
1418    /// - `path`: The path to the file to validate
1419    ///
1420    /// ## Returns
1421    /// - `Ok(true)`: The file is a valid EPUB document
1422    /// - `Ok(false)`: The file exists but is not a valid EPUB (e.g., missing required files,
1423    ///   invalid XML structure, unrecognized version)
1424    /// - `Err(EpubError)`: A critical error occurred (e.g., IO error, ZIP archive error,
1425    ///   encoding error, mutex poison)
1426    pub fn is_valid_epub<P: AsRef<Path>>(path: P) -> Result<bool, EpubError> {
1427        let result = EpubDoc::new(path);
1428
1429        match result {
1430            Ok(_) => Ok(true),
1431            Err(err) if Self::is_outside_error(&err) => Err(err),
1432            Err(_) => Ok(false),
1433        }
1434    }
1435
1436    /// Determines if an error is a "critical" external error that should be propagated
1437    ///
1438    /// ## Error Classification
1439    /// Outside errors (returned as `Err`):
1440    /// - ArchiveError: ZIP archive corruption or read errors
1441    /// - IOError: File system or read errors
1442    /// - MutexError: Thread synchronization errors
1443    /// - Utf8DecodeError: UTF-8 encoding errors
1444    /// - Utf16DecodeError: UTF-16 encoding errors
1445    /// - QuickXmlError: XML parser errors
1446    ///
1447    /// Irrelevant errors (returned as `Ok(false)`): 
1448    /// - these errors could not have occurred in this situation.
1449    /// - EpubBuilderError
1450    /// - WalkDirError
1451    ///
1452    /// Content errors (returned as `Ok(false)`):
1453    /// - All other EpubError variants
1454    fn is_outside_error(err: &EpubError) -> bool {
1455        matches!(
1456            err,
1457            EpubError::ArchiveError { .. }
1458                | EpubError::IOError { .. }
1459                | EpubError::MutexError { .. }
1460                | EpubError::Utf8DecodeError { .. }
1461                | EpubError::Utf16DecodeError { .. }
1462                | EpubError::QuickXmlError { .. }
1463        )
1464    }
1465}
1466
1467#[cfg(test)]
1468mod tests {
1469    use std::{
1470        fs::File,
1471        io::BufReader,
1472        path::{Path, PathBuf},
1473    };
1474
1475    use crate::{epub::EpubDoc, error::EpubError, utils::XmlReader};
1476
1477    /// Section 3.3 package documents
1478    mod package_documents_tests {
1479        use std::{path::Path, sync::atomic::Ordering};
1480
1481        use crate::epub::{EpubDoc, EpubVersion};
1482
1483        /// ID: pkg-collections-unknown
1484        ///
1485        /// The package document contains a collection with an unknown role. The reading system must open the EPUB successfully.
1486        #[test]
1487        fn test_pkg_collections_unknown() {
1488            let epub_file = Path::new("./test_case/pkg-collections-unknown.epub");
1489            let doc = EpubDoc::new(epub_file);
1490            assert!(doc.is_ok());
1491        }
1492
1493        /// ID: pkg-creator-order
1494        ///
1495        /// Several creators are listed in the package document. The reading system must not display them out of order (but it may display only the first).
1496        #[test]
1497        fn test_pkg_creator_order() {
1498            let epub_file = Path::new("./test_case/pkg-creator-order.epub");
1499            let doc = EpubDoc::new(epub_file);
1500            assert!(doc.is_ok());
1501
1502            let doc = doc.unwrap();
1503            let creators = doc.get_metadata_value("creator");
1504            assert!(creators.is_some());
1505
1506            let creators = creators.unwrap();
1507            assert_eq!(creators.len(), 5);
1508            assert_eq!(
1509                creators,
1510                vec![
1511                    "Dave Cramer",
1512                    "Wendy Reid",
1513                    "Dan Lazin",
1514                    "Ivan Herman",
1515                    "Brady Duga",
1516                ]
1517            );
1518        }
1519
1520        /// ID: pkg-manifest-unknown
1521        ///
1522        /// The package document contains a manifest item with unknown properties. The reading system must open the EPUB successfully.
1523        #[test]
1524        fn test_pkg_manifest_order() {
1525            let epub_file = Path::new("./test_case/pkg-manifest-unknown.epub");
1526            let doc = EpubDoc::new(epub_file);
1527            assert!(doc.is_ok());
1528
1529            let doc = doc.unwrap();
1530            assert_eq!(doc.manifest.len(), 2);
1531            assert!(doc.get_manifest_item("nav").is_ok());
1532            assert!(doc.get_manifest_item("content_001").is_ok());
1533            assert!(doc.get_manifest_item("content_002").is_err());
1534        }
1535
1536        /// ID: pkg-meta-unknown
1537        ///
1538        /// The package document contains a meta tag with an unknown property. The reading system must open the EPUB successfully.
1539        #[test]
1540        fn test_pkg_meta_unknown() {
1541            let epub_file = Path::new("./test_case/pkg-meta-unknown.epub");
1542            let doc = EpubDoc::new(epub_file);
1543            assert!(doc.is_ok());
1544
1545            let doc = doc.unwrap();
1546            let value = doc.get_metadata_value("dcterms:isReferencedBy");
1547            assert!(value.is_some());
1548            let value = value.unwrap();
1549            assert_eq!(value.len(), 1);
1550            assert_eq!(
1551                value,
1552                vec!["https://www.w3.org/TR/epub-rs/#confreq-rs-pkg-meta-unknown"]
1553            );
1554
1555            let value = doc.get_metadata_value("dcterms:modified");
1556            assert!(value.is_some());
1557            let value = value.unwrap();
1558            assert_eq!(value.len(), 1);
1559            assert_eq!(value, vec!["2021-01-11T00:00:00Z"]);
1560
1561            let value = doc.get_metadata_value("dcterms:title");
1562            assert!(value.is_none());
1563        }
1564
1565        /// ID: pkg-meta-whitespace
1566        ///
1567        /// The package document's title and creator contain leading and trailing spaces along with excess internal whitespace. The reading system must render only a single space in all cases.
1568        #[test]
1569        fn test_pkg_meta_white_space() {
1570            let epub_file = Path::new("./test_case/pkg-meta-whitespace.epub");
1571            let doc = EpubDoc::new(epub_file);
1572            assert!(doc.is_ok());
1573
1574            let doc = doc.unwrap();
1575            let value = doc.get_metadata_value("creator");
1576            assert!(value.is_some());
1577            let value = value.unwrap();
1578            assert_eq!(value.len(), 1);
1579            assert_eq!(value, vec!["Dave Cramer"]);
1580
1581            let value = doc.get_metadata_value("description");
1582            assert!(value.is_some());
1583            let value = value.unwrap();
1584            assert_eq!(value.len(), 1);
1585            assert_eq!(
1586                value,
1587                vec![
1588                    "The package document's title and creator contain leading and trailing spaces along with excess internal whitespace. The reading system must render only a single space in all cases."
1589                ]
1590            );
1591        }
1592
1593        /// ID: pkg-spine-duplicate-item-hyperlink
1594        ///
1595        /// The spine contains several references to the same content document. The reading system must move to the position of the first duplicate in the reading order when following a hyperlink.
1596        #[test]
1597        fn test_pkg_spine_duplicate_item_hyperlink() {
1598            let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-hyperlink.epub");
1599            let doc = EpubDoc::new(epub_file);
1600            assert!(doc.is_ok());
1601
1602            let mut doc = doc.unwrap();
1603            assert_eq!(doc.spine.len(), 4);
1604            assert_eq!(
1605                doc.navigate_by_spine_index(0).unwrap(),
1606                doc.get_manifest_item("content_001").unwrap()
1607            );
1608            assert_eq!(
1609                doc.navigate_by_spine_index(1).unwrap(),
1610                doc.get_manifest_item("content_002").unwrap()
1611            );
1612            assert_eq!(
1613                doc.navigate_by_spine_index(2).unwrap(),
1614                doc.get_manifest_item("content_002").unwrap()
1615            );
1616            assert_eq!(
1617                doc.navigate_by_spine_index(3).unwrap(),
1618                doc.get_manifest_item("content_002").unwrap()
1619            );
1620        }
1621
1622        /// ID: pkg-spine-duplicate-item-rendering
1623        ///
1624        /// The spine contains several references to the same content document. The reading system must not skip the duplicates when rendering the reading order.
1625        #[test]
1626        fn test_pkg_spine_duplicate_item_rendering() {
1627            let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-rendering.epub");
1628            let doc = EpubDoc::new(epub_file);
1629            assert!(doc.is_ok());
1630
1631            let mut doc = doc.unwrap();
1632            assert_eq!(doc.spine.len(), 4);
1633
1634            let result = doc.spine_prev();
1635            assert!(result.is_none());
1636
1637            let result = doc.spine_next();
1638            assert!(result.is_some());
1639
1640            doc.spine_next();
1641            doc.spine_next();
1642            let result = doc.spine_next();
1643            assert!(result.is_none());
1644        }
1645
1646        /// ID: pkg-spine-nonlinear-activation
1647        ///
1648        /// An itemref in the spine is marked as non-linear. Although it (possibly) cannot be accessed through the table of contents, it can be reached from a link in the XHTML content.
1649        #[test]
1650        fn test_pkg_spine_nonlinear_activation() {
1651            let epub_file = Path::new("./test_case/pkg-spine-nonlinear-activation.epub");
1652            let doc = EpubDoc::new(epub_file);
1653            assert!(doc.is_ok());
1654
1655            let mut doc = doc.unwrap();
1656            assert!(doc.spine_prev().is_none());
1657            assert!(doc.spine_next().is_none());
1658
1659            assert!(doc.navigate_by_spine_index(1).is_some());
1660            assert!(doc.spine_prev().is_none());
1661            assert!(doc.spine_next().is_none());
1662        }
1663
1664        /// ID: pkg-spine-order
1665        ///
1666        /// Basic test of whether a reading system can display spine items in the correct order. The test fails if the reading system presents content in the order in which the file names sort, or if it presents files in manifest order rather than spine order.
1667        #[test]
1668        fn test_pkg_spine_order() {
1669            let epub_file = Path::new("./test_case/pkg-spine-order.epub");
1670            let doc = EpubDoc::new(epub_file);
1671            assert!(doc.is_ok());
1672
1673            let doc = doc.unwrap();
1674            assert_eq!(doc.spine.len(), 4);
1675            assert_eq!(
1676                doc.spine
1677                    .iter()
1678                    .map(|item| item.idref.clone())
1679                    .collect::<Vec<String>>(),
1680                vec![
1681                    "d-content_001",
1682                    "c-content_002",
1683                    "b-content_003",
1684                    "a-content_004",
1685                ]
1686            );
1687        }
1688
1689        /// ID: pkg-spine-order-svg
1690        ///
1691        /// Basic test of whether a reading system can display SVG spine items in the correct order.
1692        #[test]
1693        fn test_spine_order_svg() {
1694            let epub_file = Path::new("./test_case/pkg-spine-order-svg.epub");
1695            let doc = EpubDoc::new(epub_file);
1696            assert!(doc.is_ok());
1697
1698            let mut doc = doc.unwrap();
1699            assert_eq!(doc.spine.len(), 4);
1700
1701            loop {
1702                if let Some(spine) = doc.spine_next() {
1703                    let idref = doc.spine[doc.current_spine_index.load(Ordering::Relaxed)]
1704                        .idref
1705                        .clone();
1706                    let resource = doc.get_manifest_item(&idref);
1707                    assert!(resource.is_ok());
1708
1709                    let resource = resource.unwrap();
1710                    assert_eq!(spine, resource);
1711                } else {
1712                    break;
1713                }
1714            }
1715
1716            assert_eq!(doc.current_spine_index.load(Ordering::Relaxed), 3);
1717        }
1718
1719        /// ID: pkg-spine-unknown
1720        ///
1721        /// The package document contains a spine item with unknown properties. The reading system must open the EPUB successfully.
1722        #[test]
1723        fn test_pkg_spine_unknown() {
1724            let epub_file = Path::new("./test_case/pkg-spine-unknown.epub");
1725            let doc = EpubDoc::new(epub_file);
1726            assert!(doc.is_ok());
1727
1728            let doc = doc.unwrap();
1729            assert_eq!(doc.spine.len(), 1);
1730            assert_eq!(doc.spine[0].idref, "content_001");
1731            assert_eq!(doc.spine[0].id, None);
1732            assert_eq!(doc.spine[0].linear, true);
1733            assert_eq!(doc.spine[0].properties, Some("untrustworthy".to_string()));
1734        }
1735
1736        /// ID: pkg-title-order
1737        ///
1738        /// Several titles are listed in the package document. The reading system must use the first title (and whether to use other titles is not defined).
1739        #[test]
1740        fn test_pkg_title_order() {
1741            let epub_file = Path::new("./test_case/pkg-title-order.epub");
1742            let doc = EpubDoc::new(epub_file);
1743            assert!(doc.is_ok());
1744
1745            let doc = doc.unwrap();
1746            let title_list = doc.get_title();
1747            assert!(title_list.is_ok());
1748
1749            let title_list = title_list.unwrap();
1750            assert_eq!(title_list.len(), 6);
1751            assert_eq!(
1752                title_list,
1753                vec![
1754                    "pkg-title-order",
1755                    "This title must not display first",
1756                    "Also, this title must not display first",
1757                    "This title also must not display first",
1758                    "This title must also not display first",
1759                    "This title must not display first, also",
1760                ]
1761            );
1762        }
1763
1764        /// ID: pkg-unique-id
1765        ///
1766        /// The package document's dc:identifier is identical across two publications. The reading system should display both publications independently.
1767        #[test]
1768        fn test_pkg_unique_id() {
1769            let epub_file = Path::new("./test_case/pkg-unique-id.epub");
1770            let doc_1 = EpubDoc::new(epub_file);
1771            assert!(doc_1.is_ok());
1772
1773            let epub_file = Path::new("./test_case/pkg-unique-id_duplicate.epub");
1774            let doc_2 = EpubDoc::new(epub_file);
1775            assert!(doc_2.is_ok());
1776
1777            let doc_1 = doc_1.unwrap();
1778            let doc_2 = doc_2.unwrap();
1779
1780            assert_eq!(
1781                doc_1.get_identifier().unwrap(),
1782                doc_2.get_identifier().unwrap()
1783            );
1784            assert_eq!(doc_1.unique_identifier, "pkg-unique-id");
1785            assert_eq!(doc_2.unique_identifier, "pkg-unique-id");
1786        }
1787
1788        /// ID: pkg-version-backward
1789        ///
1790        /// “Reading Systems MUST attempt to process an EPUB Publication whose Package Document version attribute is less than "3.0"”. This is an EPUB with package version attribute set to "0", to see if a reading system will open it.
1791        #[test]
1792        fn test_pkg_version_backward() {
1793            let epub_file = Path::new("./test_case/pkg-version-backward.epub");
1794            let doc = EpubDoc::new(epub_file);
1795            assert!(doc.is_ok());
1796
1797            let doc = doc.unwrap();
1798            assert_eq!(doc.version, EpubVersion::Version3_0);
1799        }
1800
1801        /// ID: pkg-linked-records
1802        ///
1803        /// Reading System must process and display the title and creator metadata from the package document. An ONIX 3.0 format linked metadata record exists, but contains neither title nor creator metadata.
1804        #[test]
1805        fn test_pkg_linked_records() {
1806            let epub_file = Path::new("./test_case/pkg-linked-records.epub");
1807            let doc = EpubDoc::new(epub_file);
1808            assert!(doc.is_ok());
1809
1810            let doc = doc.unwrap();
1811            assert_eq!(doc.metadata_link.len(), 3);
1812
1813            let item = doc.metadata_link.iter().find(|&item| {
1814                if let Some(properties) = &item.properties {
1815                    properties.eq("onix")
1816                } else {
1817                    false
1818                }
1819            });
1820            assert!(item.is_some());
1821        }
1822
1823        /// ID: pkg-manifest-unlisted-resource
1824        ///
1825        /// The XHTML content references an image that does not appear in the manifest. The image should not be shown.
1826        #[test]
1827        fn test_pkg_manifest_unlisted_resource() {
1828            let epub_file = Path::new("./test_case/pkg-manifest-unlisted-resource.epub");
1829            let doc = EpubDoc::new(epub_file);
1830            assert!(doc.is_ok());
1831
1832            let doc = doc.unwrap();
1833            assert!(
1834                doc.get_manifest_item_by_path("EPUB/content_001.xhtml")
1835                    .is_ok()
1836            );
1837
1838            assert!(doc.get_manifest_item_by_path("EPUB/red.png").is_err());
1839            let err = doc.get_manifest_item_by_path("EPUB/red.png").unwrap_err();
1840            assert_eq!(
1841                err.to_string(),
1842                "Resource not found: Unable to find resource from \"EPUB/red.png\"."
1843            );
1844        }
1845    }
1846
1847    /// Section 3.4 manifest fallbacks
1848    ///
1849    /// The tests under this module seem to favor the reading system rather than the EPUB format itself
1850    mod manifest_fallbacks_tests {
1851        use std::path::Path;
1852
1853        use crate::epub::EpubDoc;
1854
1855        /// ID: pub-foreign_bad-fallback
1856        ///
1857        /// This is a test of manifest fallbacks where both the spine item and the fallback are likely to be unsupported. The spine item is a DMG, with a fallback to a PSD file. Reading systems may raise an error on the ingenstion workflow.
1858        #[test]
1859        fn test_pub_foreign_bad_fallback() {
1860            let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
1861            let doc = EpubDoc::new(epub_file);
1862            assert!(doc.is_ok());
1863
1864            let doc = doc.unwrap();
1865            assert!(doc.get_manifest_item("content_001").is_ok());
1866            assert!(doc.get_manifest_item("bar").is_ok());
1867
1868            assert_eq!(
1869                doc.get_manifest_item_with_fallback("content_001", vec!["application/xhtml+xml"])
1870                    .unwrap_err()
1871                    .to_string(),
1872                "No supported file format: The fallback resource does not contain the file format you support."
1873            );
1874        }
1875
1876        /// ID: pub-foreign_image
1877        ///
1878        /// An HTML content file contains a PSD image, with a manifest fallback to a PNG image. This tests fallbacks for resources that are not in the spine.
1879        #[test]
1880        fn test_pub_foreign_image() {
1881            let epub_file = Path::new("./test_case/pub-foreign_image.epub");
1882            let doc = EpubDoc::new(epub_file);
1883            assert!(doc.is_ok());
1884
1885            let doc = doc.unwrap();
1886            let result = doc.get_manifest_item_with_fallback(
1887                "image-tiff",
1888                vec!["image/png", "application/xhtml+xml"],
1889            );
1890            assert!(result.is_ok());
1891
1892            let (_, mime) = result.unwrap();
1893            assert_eq!(mime, "image/png");
1894        }
1895
1896        /// ID: pub-foreign_json-spine
1897        ///
1898        /// This EPUB uses a JSON content file in the spine, with a manifest fallback to an HTML document. If the reading system does not support JSON, it should display the HTML.
1899        #[test]
1900        fn test_pub_foreign_json_spine() {
1901            let epub_file = Path::new("./test_case/pub-foreign_json-spine.epub");
1902            let doc = EpubDoc::new(epub_file);
1903            assert!(doc.is_ok());
1904
1905            let doc = doc.unwrap();
1906            let result = doc.get_manifest_item_with_fallback(
1907                "content_primary",
1908                vec!["application/xhtml+xml", "application/json"],
1909            );
1910            assert!(result.is_ok());
1911            let (_, mime) = result.unwrap();
1912            assert_eq!(mime, "application/json");
1913
1914            let result = doc
1915                .get_manifest_item_with_fallback("content_primary", vec!["application/xhtml+xml"]);
1916            assert!(result.is_ok());
1917            let (_, mime) = result.unwrap();
1918            assert_eq!(mime, "application/xhtml+xml");
1919        }
1920
1921        /// ID: pub-foreign_xml-spine
1922        ///
1923        /// This EPUB uses an ordinary XML content file with mimetype application/xml in the spine, with a manifest fallback to an HTML document. If the reading system does not support XML, it should display the HTML.
1924        #[test]
1925        fn test_pub_foreign_xml_spine() {
1926            let epub_file = Path::new("./test_case/pub-foreign_xml-spine.epub");
1927            let doc = EpubDoc::new(epub_file);
1928            assert!(doc.is_ok());
1929
1930            let doc = doc.unwrap();
1931            let result = doc.get_manifest_item_with_fallback(
1932                "content_primary",
1933                vec!["application/xhtml+xml", "application/xml"],
1934            );
1935            assert!(result.is_ok());
1936            let (_, mime) = result.unwrap();
1937            assert_eq!(mime, "application/xml");
1938
1939            let result = doc
1940                .get_manifest_item_with_fallback("content_primary", vec!["application/xhtml+xml"]);
1941            assert!(result.is_ok());
1942            let (_, mime) = result.unwrap();
1943            assert_eq!(mime, "application/xhtml+xml");
1944        }
1945
1946        /// ID: pub-foreign_xml-suffix-spine
1947        ///
1948        /// This EPUB uses an custom XML content file with mimetype application/dtc+xml in the spine, with a manifest fallback to an HTML document. If the reading system does not support XML, it should display the HTML.
1949        #[test]
1950        fn test_pub_foreign_xml_suffix_spine() {
1951            let epub_file = Path::new("./test_case/pub-foreign_xml-suffix-spine.epub");
1952            let doc = EpubDoc::new(epub_file);
1953            assert!(doc.is_ok());
1954
1955            let doc = doc.unwrap();
1956            let result = doc.get_manifest_item_with_fallback(
1957                "content_primary",
1958                vec!["application/xhtml+xml", "application/dtc+xml"],
1959            );
1960            assert!(result.is_ok());
1961            let (_, mime) = result.unwrap();
1962            assert_eq!(mime, "application/dtc+xml");
1963
1964            let result = doc
1965                .get_manifest_item_with_fallback("content_primary", vec!["application/xhtml+xml"]);
1966            assert!(result.is_ok());
1967            let (_, mime) = result.unwrap();
1968            assert_eq!(mime, "application/xhtml+xml");
1969        }
1970    }
1971
1972    /// Section 3.9 open container format
1973    mod open_container_format_tests {
1974        use std::{cmp::min, io::Read, path::Path};
1975
1976        use sha1::{Digest, Sha1};
1977
1978        use crate::epub::EpubDoc;
1979
1980        /// ID: ocf-metainf-inc
1981        ///
1982        /// An extra configuration file, not in the reserved files' list, is added to the META-INF folder; this file must be ignored.
1983        #[test]
1984        fn test_ocf_metainf_inc() {
1985            let epub_file = Path::new("./test_case/ocf-metainf-inc.epub");
1986            let doc = EpubDoc::new(epub_file);
1987            assert!(doc.is_ok());
1988        }
1989
1990        /// ID: ocf-metainf-manifest
1991        ///
1992        /// An ancillary manifest file, containing an extra spine item, is present in the META-INF directory; this extra item must be ignored by the reading system.
1993        #[test]
1994        fn test_ocf_metainf_manifest() {
1995            let epub_file = Path::new("./test_case/ocf-metainf-manifest.epub");
1996            let doc = EpubDoc::new(epub_file);
1997            assert!(doc.is_ok());
1998        }
1999
2000        /// ID: ocf-package_arbitrary
2001        ///
2002        /// The EPUB contains three valid package files and three corresponding sets of content documents, but only one of the packages, in an unusual subdirectory, is referenced by the container.xml file. The reading system must use this package.
2003        #[test]
2004        fn test_ocf_package_arbitrary() {
2005            let epub_file = Path::new("./test_case/ocf-package_arbitrary.epub");
2006            let doc = EpubDoc::new(epub_file);
2007            assert!(doc.is_ok());
2008
2009            let doc = doc.unwrap();
2010            assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2011        }
2012
2013        /// ID: ocf-package_multiple
2014        ///
2015        /// The EPUB contains three valid package files and three corresponding sets of content documents, all referenced by the container.xml file. The reading system must use the first package.
2016        #[test]
2017        fn test_ocf_package_multiple() {
2018            let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2019            let doc = EpubDoc::new(epub_file);
2020            assert!(doc.is_ok());
2021
2022            let doc = doc.unwrap();
2023            assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2024            assert_eq!(doc.base_path, Path::new("FOO/BAR"));
2025        }
2026
2027        /// ID: ocf-url_link-leaking-relative
2028        ///
2029        /// Use a relative link with several double-dot path segments from the content to a photograph. The folder hierarchy containing the photograph starts at the root level; the relative image reference exceeds depth of hierarchy.
2030        #[test]
2031        fn test_ocf_url_link_leaking_relative() {
2032            let epub_file = Path::new("./test_case/ocf-url_link-leaking-relative.epub");
2033            let doc = EpubDoc::new(epub_file);
2034            assert!(doc.is_err());
2035            assert_eq!(
2036                doc.err().unwrap().to_string(),
2037                String::from(
2038                    "Relative link leakage: Path \"../../../../media/imgs/monastery.jpg\" is out of container range."
2039                )
2040            )
2041        }
2042
2043        /// ID: ocf-url_link-path-absolute
2044        ///
2045        /// Use a path-absolute link, i.e., beginning with a leading slash, from the content to a photograph. The folder hierarchy containing the photograph starts at the root level.
2046        #[test]
2047        fn test_ocf_url_link_path_absolute() {
2048            let epub_file = Path::new("./test_case/ocf-url_link-path-absolute.epub");
2049            let doc = EpubDoc::new(epub_file);
2050            assert!(doc.is_ok());
2051
2052            let doc = doc.unwrap();
2053            let resource = doc.manifest.get("photo").unwrap();
2054            assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2055        }
2056
2057        /// ID: ocf-url_link-relative
2058        ///
2059        /// A simple relative link from the content to a photograph. The folder hierarchy containing the photograph starts at the root level.
2060        #[test]
2061        fn test_ocf_url_link_relative() {
2062            let epub_file = Path::new("./test_case/ocf-url_link-relative.epub");
2063            let doc = EpubDoc::new(epub_file);
2064            assert!(doc.is_ok());
2065
2066            let doc = doc.unwrap();
2067            let resource = doc.manifest.get("photo").unwrap();
2068            assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2069        }
2070
2071        /// ID: ocf-url_manifest
2072        ///
2073        /// The manifest refers to an XHTML file in an arbitrary subfolder. The reading system must be able to find the content.
2074        #[test]
2075        fn test_ocf_url_manifest() {
2076            let epub_file = Path::new("./test_case/ocf-url_manifest.epub");
2077            let doc = EpubDoc::new(epub_file);
2078            assert!(doc.is_ok());
2079
2080            let doc = doc.unwrap();
2081            assert!(doc.get_manifest_item("nav").is_ok());
2082            assert!(doc.get_manifest_item("content_001").is_ok());
2083            assert!(doc.get_manifest_item("content_002").is_err());
2084        }
2085
2086        /// ID: ocf-url_relative
2087        ///
2088        /// The manifest refers to an XHTML file in an arbitrary subfolder that is relative to the package's own arbitrary folder. The reading system must be able to find the content.
2089        #[test]
2090        fn test_ocf_url_relative() {
2091            let epub_file = Path::new("./test_case/ocf-url_relative.epub");
2092            let doc = EpubDoc::new(epub_file);
2093            assert!(doc.is_ok());
2094
2095            let doc = doc.unwrap();
2096            assert_eq!(doc.package_path, Path::new("foo/BAR/baz.opf"));
2097            assert_eq!(doc.base_path, Path::new("foo/BAR"));
2098            assert_eq!(
2099                doc.manifest.get("nav").unwrap().path,
2100                Path::new("foo/BAR/nav.xhtml")
2101            );
2102            assert_eq!(
2103                doc.manifest.get("content_001").unwrap().path,
2104                Path::new("foo/BAR/qux/content_001.xhtml")
2105            );
2106            assert!(doc.get_manifest_item("nav").is_ok());
2107            assert!(doc.get_manifest_item("content_001").is_ok());
2108        }
2109
2110        /// ID: ocf-zip-comp
2111        ///
2112        /// MUST treat any OCF ZIP container that uses compression techniques other than Deflate as in error.
2113        /// This test case does not use compression methods other than Deflate and cannot detect whether it is effective.
2114        #[test]
2115        fn test_ocf_zip_comp() {
2116            let epub_file = Path::new("./test_case/ocf-zip-comp.epub");
2117            let doc = EpubDoc::new(epub_file);
2118            assert!(doc.is_ok());
2119        }
2120
2121        /// ID: ocf-zip-mult
2122        ///
2123        /// MUST treat any OCF ZIP container that splits the content into segments as in error.
2124        /// This test case is not a segmented OCF ZIP container and cannot be tested to see if it is valid.
2125        #[test]
2126        fn test_ocf_zip_mult() {
2127            let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2128            let doc = EpubDoc::new(epub_file);
2129            assert!(doc.is_ok());
2130        }
2131
2132        /// ID: ocf-font_obfuscation
2133        ///
2134        /// An obfuscated (TrueType) font should be displayed after de-obfuscation.
2135        #[test]
2136        fn test_ocf_font_obfuscation() {
2137            let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2138            let doc = EpubDoc::new(epub_file);
2139            assert!(doc.is_ok());
2140
2141            let doc = doc.unwrap();
2142            let unique_id = doc.unique_identifier.clone();
2143
2144            let mut hasher = Sha1::new();
2145            hasher.update(unique_id.as_bytes());
2146            let hash = hasher.finalize();
2147            let mut key = vec![0u8; 1040];
2148            for i in 0..1040 {
2149                key[i] = hash[i % hash.len()];
2150            }
2151
2152            assert!(doc.encryption.is_some());
2153            assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2154
2155            let data = &doc.encryption.unwrap()[0];
2156            assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2157
2158            let font_file = doc
2159                .archive
2160                .lock()
2161                .unwrap()
2162                .by_name(&data.data)
2163                .unwrap()
2164                .bytes()
2165                .collect::<Result<Vec<u8>, _>>();
2166            assert!(font_file.is_ok());
2167            let font_file = font_file.unwrap();
2168
2169            // 根据EPUB规范,字体混淆是直接对字体文件进行的,不需要解压步骤,直接进行去混淆处理
2170            let mut deobfuscated = font_file.clone();
2171            for i in 0..min(1040, deobfuscated.len()) {
2172                deobfuscated[i] ^= key[i];
2173            }
2174
2175            assert!(is_valid_font(&deobfuscated));
2176        }
2177
2178        /// ID: ocf-font_obfuscation-bis
2179        ///
2180        /// An obfuscated (TrueType) font should not be displayed after de-obfuscation, because the obfuscation used a different publication id.
2181        #[test]
2182        fn test_ocf_font_obfuscation_bis() {
2183            let epub_file = Path::new("./test_case/ocf-font_obfuscation_bis.epub");
2184            let doc = EpubDoc::new(epub_file);
2185            assert!(doc.is_ok());
2186
2187            let doc = doc.unwrap();
2188
2189            let wrong_unique_id = "wrong-publication-id";
2190            let mut hasher = Sha1::new();
2191            hasher.update(wrong_unique_id.as_bytes());
2192            let hash = hasher.finalize();
2193            let mut wrong_key = vec![0u8; 1040];
2194            for i in 0..1040 {
2195                wrong_key[i] = hash[i % hash.len()];
2196            }
2197
2198            assert!(doc.encryption.is_some());
2199            assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2200
2201            let data = &doc.encryption.unwrap()[0];
2202            assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2203
2204            let font_file = doc
2205                .archive
2206                .lock()
2207                .unwrap()
2208                .by_name(&data.data)
2209                .unwrap()
2210                .bytes()
2211                .collect::<Result<Vec<u8>, _>>();
2212            assert!(font_file.is_ok());
2213            let font_file = font_file.unwrap();
2214
2215            // 使用错误的密钥进行去混淆
2216            let mut deobfuscated_with_wrong_key = font_file.clone();
2217            for i in 0..std::cmp::min(1040, deobfuscated_with_wrong_key.len()) {
2218                deobfuscated_with_wrong_key[i] ^= wrong_key[i];
2219            }
2220
2221            assert!(!is_valid_font(&deobfuscated_with_wrong_key));
2222        }
2223
2224        fn is_valid_font(data: &[u8]) -> bool {
2225            if data.len() < 4 {
2226                return false;
2227            }
2228            let sig = &data[0..4];
2229            // OTF: "OTTO"
2230            // TTF: 0x00010000, 0x00020000, "true", "typ1"
2231            sig == b"OTTO"
2232                || sig == b"\x00\x01\x00\x00"
2233                || sig == b"\x00\x02\x00\x00"
2234                || sig == b"true"
2235                || sig == b"typ1"
2236        }
2237    }
2238
2239    #[test]
2240    fn test_parse_container() {
2241        let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2242        let doc = EpubDoc::new(epub_file);
2243        assert!(doc.is_ok());
2244
2245        // let doc = doc.unwrap();
2246        let container = r#"
2247        <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2248            <rootfiles></rootfiles>
2249        </container>
2250        "#
2251        .to_string();
2252
2253        let result = EpubDoc::<BufReader<File>>::parse_container(container);
2254        assert!(result.is_err());
2255        assert_eq!(
2256            result.unwrap_err(),
2257            EpubError::NonCanonicalFile { tag: "rootfile".to_string() }
2258        );
2259
2260        let container = r#"
2261        <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2262            <rootfiles>
2263                <rootfile media-type="application/oebps-package+xml"/>
2264            </rootfiles>
2265        </container>
2266        "#
2267        .to_string();
2268
2269        let result = EpubDoc::<BufReader<File>>::parse_container(container);
2270        assert!(result.is_err());
2271        assert_eq!(
2272            result.unwrap_err(),
2273            EpubError::MissingRequiredAttribute {
2274                tag: "rootfile".to_string(),
2275                attribute: "full-path".to_string(),
2276            }
2277        );
2278
2279        let container = r#"
2280        <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2281            <rootfiles>
2282                <rootfile media-type="application/oebps-package+xml" full-path="EPUB/content.opf"/>
2283            </rootfiles>
2284        </container>
2285        "#
2286        .to_string();
2287
2288        let result = EpubDoc::<BufReader<File>>::parse_container(container);
2289        assert!(result.is_ok());
2290        assert_eq!(result.unwrap(), PathBuf::from("EPUB/content.opf"))
2291    }
2292
2293    #[test]
2294    fn test_parse_manifest() {
2295        let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2296        let doc = EpubDoc::new(epub_file);
2297        assert!(doc.is_ok());
2298
2299        let manifest = r#"
2300        <manifest>
2301            <item href="content_001.xhtml" media-type="application/xhtml+xml"/>
2302            <item properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2303        </manifest>
2304        "#;
2305        let mut doc = doc.unwrap();
2306        let element = XmlReader::parse(manifest);
2307        assert!(element.is_ok());
2308
2309        let element = element.unwrap();
2310        let result = doc.parse_manifest(&element);
2311        assert!(result.is_err());
2312        assert_eq!(
2313            result.unwrap_err(),
2314            EpubError::MissingRequiredAttribute {
2315                tag: "item".to_string(),
2316                attribute: "id".to_string(),
2317            },
2318        );
2319
2320        let manifest = r#"
2321        <manifest>
2322            <item id="content_001" media-type="application/xhtml+xml"/>
2323            <item id="nav" properties="nav" media-type="application/xhtml+xml"/>
2324        </manifest>
2325        "#;
2326        let element = XmlReader::parse(manifest);
2327        assert!(element.is_ok());
2328
2329        let element = element.unwrap();
2330        let result = doc.parse_manifest(&element);
2331        assert!(result.is_err());
2332        assert_eq!(
2333            result.unwrap_err(),
2334            EpubError::MissingRequiredAttribute {
2335                tag: "item".to_string(),
2336                attribute: "href".to_string(),
2337            },
2338        );
2339
2340        let manifest = r#"
2341        <manifest>
2342            <item id="content_001" href="content_001.xhtml"/>
2343            <item id="nav" properties="nav" href="nav.xhtml"/>
2344        </manifest>
2345        "#;
2346        let element = XmlReader::parse(manifest);
2347        assert!(element.is_ok());
2348
2349        let element = element.unwrap();
2350        let result = doc.parse_manifest(&element);
2351        assert!(result.is_err());
2352        assert_eq!(
2353            result.unwrap_err(),
2354            EpubError::MissingRequiredAttribute {
2355                tag: "item".to_string(),
2356                attribute: "media-type".to_string(),
2357            },
2358        );
2359
2360        let manifest = r#"
2361        <manifest>
2362            <item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
2363            <item id="nav" properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2364        </manifest>
2365        "#;
2366        let element = XmlReader::parse(manifest);
2367        assert!(element.is_ok());
2368
2369        let element = element.unwrap();
2370        let result = doc.parse_manifest(&element);
2371        assert!(result.is_ok());
2372    }
2373
2374    /// Test for function `has_encryption`
2375    #[test]
2376    fn test_fn_has_encryption() {
2377        let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2378        let doc = EpubDoc::new(epub_file);
2379        assert!(doc.is_ok());
2380
2381        let doc = doc.unwrap();
2382        assert!(doc.has_encryption());
2383    }
2384
2385    /// This test is used to detect whether the "META-INF/encryption.xml" file is parsed correctly
2386    #[test]
2387    fn test_fn_parse_encryption() {
2388        let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2389        let doc = EpubDoc::new(epub_file);
2390        assert!(doc.is_ok());
2391
2392        let doc = doc.unwrap();
2393        assert!(doc.encryption.is_some());
2394
2395        let encryption = doc.encryption.unwrap();
2396        assert_eq!(encryption.len(), 1);
2397        assert_eq!(encryption[0].method, "http://www.idpf.org/2008/embedding");
2398        assert_eq!(encryption[0].data, "EPUB/fonts/Lobster.ttf");
2399    }
2400
2401    #[test]
2402    fn test_get_metadata_existing_key() {
2403        let epub_file = Path::new("./test_case/epub-33.epub");
2404        let doc = EpubDoc::new(epub_file);
2405        assert!(doc.is_ok());
2406
2407        let doc = doc.unwrap();
2408
2409        let titles = doc.get_metadata("title");
2410        assert!(titles.is_some());
2411
2412        let titles = titles.unwrap();
2413        assert_eq!(titles.len(), 1);
2414        assert_eq!(titles[0].property, "title");
2415        assert_eq!(titles[0].value, "EPUB 3.3");
2416
2417        let languages = doc.get_metadata("language");
2418        assert!(languages.is_some());
2419
2420        let languages = languages.unwrap();
2421        assert_eq!(languages.len(), 1);
2422        assert_eq!(languages[0].property, "language");
2423        assert_eq!(languages[0].value, "en-us");
2424
2425        let language = doc.get_language();
2426        assert!(language.is_ok());
2427        assert_eq!(language.unwrap(), vec!["en-us"]);
2428    }
2429
2430    #[test]
2431    fn test_get_metadata_nonexistent_key() {
2432        let epub_file = Path::new("./test_case/epub-33.epub");
2433        let doc = EpubDoc::new(epub_file);
2434        assert!(doc.is_ok());
2435
2436        let doc = doc.unwrap();
2437        let metadata = doc.get_metadata("nonexistent");
2438        assert!(metadata.is_none());
2439    }
2440
2441    #[test]
2442    fn test_get_metadata_multiple_items_same_type() {
2443        let epub_file = Path::new("./test_case/epub-33.epub");
2444        let doc = EpubDoc::new(epub_file);
2445        assert!(doc.is_ok());
2446
2447        let doc = doc.unwrap();
2448
2449        let creators = doc.get_metadata("creator");
2450        assert!(creators.is_some());
2451
2452        let creators = creators.unwrap();
2453        assert_eq!(creators.len(), 3);
2454
2455        assert_eq!(creators[0].id, Some("creator_id_0".to_string()));
2456        assert_eq!(creators[0].property, "creator");
2457        assert_eq!(creators[0].value, "Matt Garrish, DAISY Consortium");
2458
2459        assert_eq!(creators[1].id, Some("creator_id_1".to_string()));
2460        assert_eq!(creators[1].property, "creator");
2461        assert_eq!(creators[1].value, "Ivan Herman, W3C");
2462
2463        assert_eq!(creators[2].id, Some("creator_id_2".to_string()));
2464        assert_eq!(creators[2].property, "creator");
2465        assert_eq!(creators[2].value, "Dave Cramer, Invited Expert");
2466    }
2467
2468    #[test]
2469    fn test_get_metadata_with_refinement() {
2470        let epub_file = Path::new("./test_case/epub-33.epub");
2471        let doc = EpubDoc::new(epub_file);
2472        assert!(doc.is_ok());
2473
2474        let doc = doc.unwrap();
2475
2476        let title = doc.get_metadata("title");
2477        assert!(title.is_some());
2478
2479        let title = title.unwrap();
2480        assert_eq!(title.len(), 1);
2481        assert_eq!(title[0].refined.len(), 1);
2482        assert_eq!(title[0].refined[0].property, "title-type");
2483        assert_eq!(title[0].refined[0].value, "main");
2484    }
2485
2486    #[test]
2487    fn test_get_manifest_item_with_fallback() {
2488        let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
2489        let doc = EpubDoc::new(epub_file);
2490        assert!(doc.is_ok());
2491
2492        let doc = doc.unwrap();
2493        assert!(doc.get_manifest_item("content_001").is_ok());
2494        assert!(doc.get_manifest_item("bar").is_ok());
2495
2496        // 当回退链上存在可回退资源时能获取资源
2497        if let Ok((_, mime)) = doc.get_manifest_item_with_fallback("content_001", vec!["image/psd"])
2498        {
2499            assert_eq!(mime, "image/psd");
2500        } else {
2501            assert!(false, "get_manifest_item_with_fallback failed");
2502        }
2503
2504        // 当回退链上不存在可回退资源时无法获取资源
2505        assert_eq!(
2506            doc.get_manifest_item_with_fallback("content_001", vec!["application/xhtml+xml"])
2507                .unwrap_err()
2508                .to_string(),
2509            "No supported file format: The fallback resource does not contain the file format you support."
2510        );
2511    }
2512
2513    #[test]
2514    fn test_get_cover() {
2515        let epub_file = Path::new("./test_case/pkg-cover-image.epub");
2516        let doc = EpubDoc::new(epub_file);
2517        if let Err(err) = &doc {
2518            println!("{}", err);
2519        }
2520        assert!(doc.is_ok());
2521
2522        let doc = doc.unwrap();
2523        let result = doc.get_cover();
2524        assert!(result.is_some());
2525
2526        let (data, mime) = result.unwrap();
2527        assert_eq!(data.len(), 5785);
2528        assert_eq!(mime, "image/jpeg");
2529    }
2530
2531    #[test]
2532    fn test_epub_2() {
2533        let epub_file = Path::new("./test_case/epub-2.epub");
2534        let doc = EpubDoc::new(epub_file);
2535        assert!(doc.is_ok());
2536
2537        let doc = doc.unwrap();
2538
2539        let titles = doc.get_title();
2540        assert!(titles.is_ok());
2541        assert_eq!(titles.unwrap(), vec!["Minimal EPUB 2.0"]);
2542    }
2543
2544    #[test]
2545    fn test_is_valid_epub_valid_file() {
2546        let result = EpubDoc::is_valid_epub("./test_case/epub-2.epub");
2547        assert!(result.is_ok());
2548        assert_eq!(result.unwrap(), true);
2549    }
2550
2551    #[test]
2552    fn test_is_valid_epub_invalid_path() {
2553        let result = EpubDoc::is_valid_epub("./test_case/nonexistent.epub");
2554        assert!(result.is_err());
2555    }
2556
2557    #[test]
2558    fn test_is_valid_epub_corrupted_zip() {
2559        let temp_dir = std::env::temp_dir();
2560        let corrupted_file = temp_dir.join("corrupted.epub");
2561
2562        std::fs::write(&corrupted_file, b"not a valid zip file").unwrap();
2563
2564        let result = EpubDoc::is_valid_epub(&corrupted_file);
2565
2566        assert!(result.is_err());
2567        let err = result.unwrap_err();
2568        assert!(matches!(err, EpubError::ArchiveError { .. }));
2569
2570        std::fs::remove_file(corrupted_file).ok();
2571    }
2572
2573    #[test]
2574    fn test_is_valid_epub_valid_epub_3() {
2575        let result = EpubDoc::is_valid_epub("./test_case/epub-33.epub");
2576        assert!(result.is_ok());
2577        assert_eq!(result.unwrap(), true);
2578    }
2579
2580    #[test]
2581    fn test_is_outside_error() {
2582        let archive_error = EpubError::ArchiveError {
2583            source: zip::result::ZipError::Io(std::io::Error::new(
2584                std::io::ErrorKind::Other,
2585                "test",
2586            )),
2587        };
2588        assert!(EpubDoc::<BufReader<File>>::is_outside_error(&archive_error));
2589
2590        let io_error = EpubError::IOError {
2591            source: std::io::Error::new(std::io::ErrorKind::NotFound, "test"),
2592        };
2593        assert!(EpubDoc::<BufReader<File>>::is_outside_error(&io_error));
2594
2595        let non_canonical = EpubError::NonCanonicalEpub { expected_file: "test".to_string() };
2596        assert!(!EpubDoc::<BufReader<File>>::is_outside_error(
2597            &non_canonical
2598        ));
2599
2600        let missing_attr = EpubError::MissingRequiredAttribute {
2601            tag: "test".to_string(),
2602            attribute: "id".to_string(),
2603        };
2604        assert!(!EpubDoc::<BufReader<File>>::is_outside_error(&missing_attr));
2605    }
2606}