Skip to main content

lib_epub/
epub.rs

1//! The core module of the EPUB parsing library
2//!
3//! This module provides complete parsing functionality for EPUB ebook files
4//! and is the core component of the entire library. The `EpubDoc` structure
5//! encapsulates all the parsing logic and data access interfaces for EPUB files.
6//!
7//! ## Main references to EPUB specs:
8//! - <https://www.w3.org/TR/epub-33>
9//! - <https://idpf.org/epub/201>
10//!
11//! ## Potential Issues
12//! - The generic parameter `R: Read + Seek` increases complexity, particularly
13//!   in asynchronous environments. The current design is not conducive to multi-threaded
14//!   concurrent access and requires an external synchronization mechanism.
15//! - Some error handling may not be sufficiently nuanced, and certain edge cases
16//!   may not be adequately considered.
17//! - Loading the entire EPUB document at once may result in significant memory consumption,
18//!   especially for large publications.
19//!
20//! ## Future Work
21//! - Supports more EPUB specification features, such as media overlay and scripts.
22
23use std::{
24    collections::HashMap,
25    fs::{self, File},
26    io::{BufReader, Read, Seek},
27    path::{Path, PathBuf},
28    sync::{
29        Arc, Mutex,
30        atomic::{AtomicUsize, Ordering},
31    },
32};
33
34#[cfg(not(feature = "no-indexmap"))]
35use indexmap::IndexMap;
36use zip::{ZipArchive, result::ZipError};
37
38use crate::{
39    error::EpubError,
40    types::{
41        EncryptionData, EpubVersion, ManifestItem, MetadataItem, MetadataLinkItem,
42        MetadataRefinement, NavPoint, SpineItem,
43    },
44    utils::{
45        DecodeBytes, NormalizeWhitespace, XmlElement, XmlReader, adobe_font_dencryption,
46        check_realtive_link_leakage, compression_method_check, get_file_in_zip_archive,
47        idpf_font_dencryption,
48    },
49};
50
51/// EPUB document parser, representing a loaded and parsed EPUB publication
52///
53/// The `EpubDoc` structure is the core of the entire EPUB parsing library.
54/// It encapsulates all the parsing logic and data access interfaces for EPUB files.
55/// It is responsible for parsing various components of an EPUB, including metadata,
56/// manifests, reading order, table of contents navigation, and encrypted information,
57/// and provides methods for accessing this data.
58///
59/// Provides a unified data access interface for EPUB files, hiding the underlying
60/// file structure and parsing details. Strictly adheres to the EPUB specification
61/// in implementing the parsing logic to ensure compatibility with the standard.
62///
63/// ## Usage
64///
65/// ```rust
66/// use lib_epub::epub::EpubDoc;
67///
68/// let doc = EpubDoc::new("./test_case/epub-33.epub");
69/// assert!(doc.is_ok());
70/// ```
71///
72/// ## Notes
73/// - The `EpubDoc` structure is thread-safe **if and only if** the structure is immutable.
74/// - The fact that `EpubDoc` is mutable has no practical meaning; modifications
75///   to the structure data are not stored in the epub file.
76pub struct EpubDoc<R: Read + Seek> {
77    /// The structure of the epub file that actually holds it
78    pub(crate) archive: Arc<Mutex<ZipArchive<R>>>,
79
80    /// The path to the target epub file
81    pub(crate) epub_path: PathBuf,
82
83    /// The path to the OPF file
84    pub package_path: PathBuf,
85
86    /// The path to the directory where the opf file is located
87    pub base_path: PathBuf,
88
89    /// The epub version
90    pub version: EpubVersion,
91
92    /// The unique identifier of the epub file
93    ///
94    /// This identifier is the actual value of the unique-identifier attribute of the package.
95    pub unique_identifier: String,
96
97    /// Epub metadata extracted from OPF
98    pub metadata: Vec<MetadataItem>,
99
100    /// Data in metadata that points to external files
101    pub metadata_link: Vec<MetadataLinkItem>,
102
103    /// A list of resources contained inside an epub extracted from OPF
104    ///
105    /// All resources in the epub file are declared here, and undeclared resources
106    /// should not be stored in the epub file and cannot be obtained from it.
107    ///
108    /// ## Storage Implementation
109    ///
110    /// By default, this field uses [`IndexMap`] to preserve the original declaration
111    /// order from the OPF file, as recommended by the EPUB specification.
112    ///
113    /// To reduce dependencies, you can enable the `no-indexmap` feature to use
114    /// [`HashMap`] instead. Note that this will not preserve the manifest order.
115    ///
116    /// ## EPUB Specification
117    ///
118    /// Per the <https://www.w3.org/TR/epub-33/#sec-manifest>:
119    ///
120    /// > The order of `item` elements within the manifest is significant for
121    /// > fallback chain processing and should be preserved when processing
122    /// > the publication.
123    #[cfg(not(feature = "no-indexmap"))]
124    pub manifest: IndexMap<String, ManifestItem>,
125    #[cfg(feature = "no-indexmap")]
126    pub manifest: HashMap<String, ManifestItem>,
127
128    /// Physical reading order of publications extracted from OPF
129    ///
130    /// This attribute declares the order in which multiple files
131    /// containing published content should be displayed.
132    pub spine: Vec<SpineItem>,
133
134    /// The encryption.xml extracted from the META-INF directory
135    pub encryption: Option<Vec<EncryptionData>>,
136
137    /// The navigation data of the epub file
138    pub catalog: Vec<NavPoint>,
139
140    /// The title of the catalog
141    pub catalog_title: String,
142
143    /// The index of the current reading spine
144    current_spine_index: AtomicUsize,
145
146    /// Whether the epub file contains encryption information
147    has_encryption: bool,
148}
149
150impl<R: Read + Seek> EpubDoc<R> {
151    /// Creates a new EPUB document instance from a reader
152    ///
153    /// This function is responsible for the core logic of parsing EPUB files,
154    /// including verifying the file format, parsing container information,
155    /// loading the OPF package document, and extracting metadata, manifest,
156    /// reading order, and other core information.
157    ///
158    /// ## Parameters
159    /// - `reader`: The data source that implements the `Read` and `Seek` traits,
160    ///   usually a file or memory buffer
161    /// - `epub_path`: The path to the EPUB file, used for path resolution and validation
162    ///
163    /// ## Return
164    /// - `Ok(EpubDoc<R>)`: The successfully parsed EPUB document object
165    /// - `Err(EpubError)`: Errors encountered during parsing
166    ///
167    /// ## Notes
168    /// - This function assumes the EPUB file structure is valid
169    // TODO: 增加对必需的 metadata 的检查
170    pub fn from_reader(reader: R, epub_path: PathBuf) -> Result<Self, EpubError> {
171        // Parsing process
172        // 1. Verify that the ZIP compression method conforms to the EPUB specification
173        // 2. Parse `META-INF/container.xml` retrieves the location of the OPF file
174        // 3. Parses the OPF file to obtain package documentation information
175        // 4. Extracts version information
176        // 5. Parses metadata, manifest, and spine
177        // 6. Parses encrypted information and directory navigation
178        // 7. Verifies and extracts the unique identifier
179
180        let mut archive = ZipArchive::new(reader).map_err(EpubError::from)?;
181        let epub_path = fs::canonicalize(epub_path)?;
182
183        compression_method_check(&mut archive)?;
184
185        let container =
186            get_file_in_zip_archive(&mut archive, "META-INF/container.xml")?.decode()?;
187        let package_path = Self::parse_container(container)?;
188        let base_path = package_path
189            .parent()
190            .expect("the parent directory of the opf file must exist")
191            .to_path_buf();
192
193        let opf_file = get_file_in_zip_archive(
194            &mut archive,
195            package_path
196                .to_str()
197                .expect("package_path should be valid UTF-8"),
198        )?
199        .decode()?;
200        let package = XmlReader::parse(&opf_file)?;
201
202        let version = Self::determine_epub_version(&package)?;
203        let has_encryption = archive
204            .by_path(Path::new("META-INF/encryption.xml"))
205            .is_ok();
206
207        let mut doc = Self {
208            archive: Arc::new(Mutex::new(archive)),
209            epub_path,
210            package_path,
211            base_path,
212            version,
213            unique_identifier: String::new(),
214            metadata: vec![],
215            metadata_link: vec![],
216
217            #[cfg(feature = "no-indexmap")]
218            manifest: HashMap::new(),
219            #[cfg(not(feature = "no-indexmap"))]
220            manifest: IndexMap::new(),
221
222            spine: vec![],
223            encryption: None,
224            catalog: vec![],
225            catalog_title: String::new(),
226            current_spine_index: AtomicUsize::new(0),
227            has_encryption,
228        };
229
230        let metadata_element = package.find_elements_by_name("metadata").next().unwrap();
231        let manifest_element = package.find_elements_by_name("manifest").next().unwrap();
232        let spine_element = package.find_elements_by_name("spine").next().unwrap();
233
234        doc.parse_metadata(metadata_element)?;
235        doc.parse_manifest(manifest_element)?;
236        doc.parse_spine(spine_element)?;
237        doc.parse_encryption()?;
238        doc.parse_catalog()?;
239
240        // 断言必有唯一标识符
241        doc.unique_identifier = if let Some(uid) = package.get_attr("unique-identifier") {
242            doc.metadata.iter().find(|item| {
243                item.property == "identifier" && item.id.as_ref().is_some_and(|id| id == &uid)
244            })
245        } else {
246            doc.metadata
247                .iter()
248                .find(|item| item.property == "identifier")
249        }
250        .map(|item| item.value.clone())
251        .ok_or_else(|| EpubError::NonCanonicalFile { tag: "dc:identifier".to_string() })?;
252
253        Ok(doc)
254    }
255
256    /// Parse the EPUB container file (META-INF/container.xml)
257    ///
258    /// This function parses the container information in the EPUB file 、
259    /// to extract the path to the OPF package file. According to the EPUB
260    /// specification, the `container.xml` file must exist in the `META-INF`
261    /// directory and contain at least one `rootfile` element pointing to
262    /// the main OPF file. When multiple `rootfile` elements exist, the first
263    /// element pointing to the OPF file is used as the default.
264    ///
265    /// ## Parameters
266    /// - `data`: The content string of the container.xml
267    ///
268    /// ## Return
269    /// - `Ok(PathBuf)`: The path to the successfully parsed OPF file
270    /// - `Err(EpubError)`: Errors encountered during parsing
271    fn parse_container(data: String) -> Result<PathBuf, EpubError> {
272        let root = XmlReader::parse(&data)?;
273        let rootfile = root
274            .find_elements_by_name("rootfile")
275            .next()
276            .ok_or_else(|| EpubError::NonCanonicalFile { tag: "rootfile".to_string() })?;
277
278        let attr =
279            rootfile
280                .get_attr("full-path")
281                .ok_or_else(|| EpubError::MissingRequiredAttribute {
282                    tag: "rootfile".to_string(),
283                    attribute: "full-path".to_string(),
284                })?;
285
286        Ok(PathBuf::from(attr))
287    }
288
289    /// Parse the EPUB metadata section
290    ///
291    /// This function is responsible for parsing the `<metadata>` elements
292    /// in the OPF file to extract basic information about the publication.
293    /// It handles metadata elements from different namespaces:
294    /// - Elements in the Dublin Core namespace (`http://purl.org/dc/elements/1.1/`)
295    /// - Elements in the OPF namespace (`http://www.idpf.org/2007/opf`)
296    ///
297    /// ## Parameters
298    /// - `metadata_element`: A reference to the `<metadata>` element in the OPF file
299    fn parse_metadata(&mut self, metadata_element: &XmlElement) -> Result<(), EpubError> {
300        const DC_NAMESPACE: &str = "http://purl.org/dc/elements/1.1/";
301        const OPF_NAMESPACE: &str = "http://www.idpf.org/2007/opf";
302
303        let mut metadata = Vec::new();
304        let mut metadata_link = Vec::new();
305        let mut refinements = HashMap::<String, Vec<MetadataRefinement>>::new();
306
307        for element in metadata_element.children() {
308            match &element.namespace {
309                Some(namespace) if namespace == DC_NAMESPACE => {
310                    self.parse_dc_metadata(element, &mut metadata)?
311                }
312
313                Some(namespace) if namespace == OPF_NAMESPACE => self.parse_opf_metadata(
314                    element,
315                    &mut metadata,
316                    &mut metadata_link,
317                    &mut refinements,
318                )?,
319
320                _ => {}
321            };
322        }
323
324        for item in metadata.iter_mut() {
325            if let Some(id) = &item.id {
326                if let Some(refinements) = refinements.remove(id) {
327                    item.refined = refinements;
328                }
329            }
330        }
331
332        self.metadata = metadata;
333        self.metadata_link = metadata_link;
334        Ok(())
335    }
336
337    /// Parse the EPUB manifest section
338    ///
339    /// This function parses the `<manifest>` element in the OPF file, extracting
340    /// information about all resource files in the publication. Each resource contains
341    /// basic information such as id, file path, MIME type, as well as optional
342    /// attributes and fallback resource information.
343    ///
344    /// ## Parameters
345    /// - `manifest_element`: A reference to the `<manifest>` element in the OPF file
346    fn parse_manifest(&mut self, manifest_element: &XmlElement) -> Result<(), EpubError> {
347        let estimated_items = manifest_element.children().count();
348        #[cfg(feature = "no-indexmap")]
349        let mut resources = HashMap::with_capacity(estimated_items);
350        #[cfg(not(feature = "no-indexmap"))]
351        let mut resources = IndexMap::with_capacity(estimated_items);
352
353        for element in manifest_element.children() {
354            let id = element
355                .get_attr("id")
356                .ok_or_else(|| EpubError::MissingRequiredAttribute {
357                    tag: element.tag_name(),
358                    attribute: "id".to_string(),
359                })?
360                .to_string();
361            let path = element
362                .get_attr("href")
363                .ok_or_else(|| EpubError::MissingRequiredAttribute {
364                    tag: element.tag_name(),
365                    attribute: "href".to_string(),
366                })?
367                .to_string();
368            let mime = element
369                .get_attr("media-type")
370                .ok_or_else(|| EpubError::MissingRequiredAttribute {
371                    tag: element.tag_name(),
372                    attribute: "media-type".to_string(),
373                })?
374                .to_string();
375            let properties = element.get_attr("properties");
376            let fallback = element.get_attr("fallback");
377
378            resources.insert(
379                id.clone(),
380                ManifestItem {
381                    id,
382                    path: self.normalize_manifest_path(&path)?,
383                    mime,
384                    properties,
385                    fallback,
386                },
387            );
388        }
389
390        self.manifest = resources;
391        self.validate_fallback_chains();
392        Ok(())
393    }
394
395    /// Parse the EPUB spine section
396    ///
397    /// This function parses the `<spine>` elements in the OPF file to extract
398    /// the reading order information of the publication. The spine defines the
399    /// linear reading order of the publication's content documents, and each
400    /// spine item references resources in the manifest.
401    ///
402    /// ## Parameters
403    /// - `spine_element`: A reference to the `<spine>` element in the OPF file
404    fn parse_spine(&mut self, spine_element: &XmlElement) -> Result<(), EpubError> {
405        let mut spine = Vec::new();
406        for element in spine_element.children() {
407            let idref = element
408                .get_attr("idref")
409                .ok_or_else(|| EpubError::MissingRequiredAttribute {
410                    tag: element.tag_name(),
411                    attribute: "idref".to_string(),
412                })?
413                .to_string();
414            let id = element.get_attr("id");
415            let linear = element
416                .get_attr("linear")
417                .map(|linear| linear == "yes")
418                .unwrap_or(true);
419            let properties = element.get_attr("properties");
420
421            spine.push(SpineItem { idref, id, linear, properties });
422        }
423
424        self.spine = spine;
425        Ok(())
426    }
427
428    /// Parse the EPUB encryption file (META-INF/encryption.xml)
429    ///
430    /// This function is responsible for parsing the `encryption.xml` file
431    /// in the `META-INF` directory to extract information about encrypted
432    /// resources in the publication. According to the EPUB specification,
433    /// the encryption information describes which resources are encrypted
434    /// and the encryption methods used.
435    ///
436    /// TODO: 需要对使用非对称加密数据的加密项进行额外处理,以获取非对称加密密钥
437    fn parse_encryption(&mut self) -> Result<(), EpubError> {
438        if !self.has_encryption() {
439            return Ok(());
440        }
441
442        let mut archive = self.archive.lock()?;
443        let encryption_file =
444            get_file_in_zip_archive(&mut archive, "META-INF/encryption.xml")?.decode()?;
445
446        let root = XmlReader::parse(&encryption_file)?;
447
448        let mut encryption_data = Vec::new();
449        for data in root.children() {
450            if data.name != "EncryptedData" {
451                continue;
452            }
453
454            let method = data
455                .find_elements_by_name("EncryptionMethod")
456                .next()
457                .ok_or_else(|| EpubError::NonCanonicalFile {
458                    tag: "EncryptionMethod".to_string(),
459                })?;
460            let reference = data
461                .find_elements_by_name("CipherReference")
462                .next()
463                .ok_or_else(|| EpubError::NonCanonicalFile {
464                    tag: "CipherReference".to_string(),
465                })?;
466
467            encryption_data.push(EncryptionData {
468                method: method
469                    .get_attr("Algorithm")
470                    .ok_or_else(|| EpubError::MissingRequiredAttribute {
471                        tag: "EncryptionMethod".to_string(),
472                        attribute: "Algorithm".to_string(),
473                    })?
474                    .to_string(),
475                data: reference
476                    .get_attr("URI")
477                    .ok_or_else(|| EpubError::MissingRequiredAttribute {
478                        tag: "CipherReference".to_string(),
479                        attribute: "URI".to_string(),
480                    })?
481                    .to_string(),
482            });
483        }
484
485        if !encryption_data.is_empty() {
486            self.encryption = Some(encryption_data);
487        }
488
489        Ok(())
490    }
491
492    /// Parse the EPUB navigation information
493    ///
494    /// This function is responsible for parsing the navigation information of EPUB
495    /// publications. Different parsing strategies are used depending on the EPUB version:
496    /// - EPUB 2.0: Parses the NCX file to obtain directory information
497    /// - EPUB 3.0: Parses the Navigation Document (NAV) file to obtain directory information
498    fn parse_catalog(&mut self) -> Result<(), EpubError> {
499        const HEAD_TAGS: [&str; 6] = ["h1", "h2", "h3", "h4", "h5", "h6"];
500
501        let mut archive = self.archive.lock()?;
502        match self.version {
503            EpubVersion::Version2_0 => {
504                let opf_file =
505                    get_file_in_zip_archive(&mut archive, self.package_path.to_str().unwrap())?
506                        .decode()?;
507                let opf_element = XmlReader::parse(&opf_file)?;
508
509                let toc_id = opf_element
510                    .find_children_by_name("spine")
511                    .next()
512                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?
513                    .get_attr("toc")
514                    .ok_or_else(|| EpubError::MissingRequiredAttribute {
515                        tag: "spine".to_string(),
516                        attribute: "toc".to_string(),
517                    })?
518                    .to_owned();
519                let toc_path = self
520                    .manifest
521                    .get(&toc_id)
522                    .ok_or(EpubError::ResourceIdNotExist { id: toc_id })?
523                    .path
524                    .to_str()
525                    .unwrap();
526
527                let ncx_file = get_file_in_zip_archive(&mut archive, toc_path)?.decode()?;
528                let ncx = XmlReader::parse(&ncx_file)?;
529
530                match ncx.find_elements_by_name("docTitle").next() {
531                    Some(element) => self.catalog_title = element.text(),
532                    None => log::warn!(
533                        "Expecting to get docTitle information from the ncx file, but it's missing."
534                    ),
535                };
536
537                let nav_map = ncx
538                    .find_elements_by_name("navMap")
539                    .next()
540                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: "navMap".to_string() })?;
541
542                self.catalog = self.parse_nav_points(nav_map)?;
543
544                Ok(())
545            }
546
547            EpubVersion::Version3_0 => {
548                let nav_path = self
549                    .manifest
550                    .values()
551                    .find(|item| {
552                        if let Some(property) = &item.properties {
553                            return property.contains("nav");
554                        }
555                        false
556                    })
557                    .map(|item| item.path.clone())
558                    .ok_or_else(|| EpubError::NonCanonicalEpub {
559                        expected_file: "Navigation Document".to_string(),
560                    })?;
561
562                let nav_file =
563                    get_file_in_zip_archive(&mut archive, nav_path.to_str().unwrap())?.decode()?;
564
565                let nav_element = XmlReader::parse(&nav_file)?;
566                let nav = nav_element
567                    .find_elements_by_name("nav")
568                    .find(|&element| element.get_attr("epub:type") == Some(String::from("toc")))
569                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: "nav".to_string() })?;
570                let nav_title = nav.find_children_by_names(&HEAD_TAGS).next();
571                let nav_list = nav
572                    .find_children_by_name("ol")
573                    .next()
574                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: "ol".to_string() })?;
575
576                self.catalog = self.parse_catalog_list(nav_list)?;
577                if let Some(nav_title) = nav_title {
578                    self.catalog_title = nav_title.text();
579                };
580                Ok(())
581            }
582        }
583    }
584
585    /// Check if the EPUB file contains `encryption.xml`
586    ///
587    /// This function determines whether a publication contains encrypted resources
588    /// by checking if a `META-INF/encryption.xml` file exists in the EPUB package.
589    /// According to the EPUB specification, when resources in a publication are
590    /// encrypted, the corresponding encryption information must be declared in
591    /// the `META-INF/encryption.xml` file.
592    ///
593    /// ## Return
594    /// - `true` if the publication contains encrypted resources
595    /// - `false` if the publication does not contain encrypted resources
596    ///
597    /// ## Notes
598    /// - This function only checks the existence of the encrypted file;
599    ///   it does not verify the validity of the encrypted information.
600    #[inline]
601    pub fn has_encryption(&self) -> bool {
602        self.has_encryption
603    }
604
605    /// Retrieves a list of metadata items
606    ///
607    /// This function retrieves all matching metadata items from the EPUB metadata
608    /// based on the specified attribute name (key). Metadata items may come from
609    /// the DC (Dublin Core) namespace or the OPF namespace and contain basic
610    /// information about the publication, such as title, author, identifier, etc.
611    ///
612    /// ## Parameters
613    /// - `key`: The name of the metadata attribute to retrieve
614    ///
615    /// ## Return
616    /// - `Some(Vec<MetadataItem>)`: A vector containing all matching metadata items
617    /// - `None`: If no matching metadata items are found
618    pub fn get_metadata(&self, key: &str) -> Option<Vec<MetadataItem>> {
619        let metadatas = self
620            .metadata
621            .iter()
622            .filter(|item| item.property == key)
623            .cloned()
624            .collect::<Vec<MetadataItem>>();
625
626        (!metadatas.is_empty()).then_some(metadatas)
627    }
628
629    /// Retrieves a list of values for specific metadata items
630    ///
631    /// This function retrieves the values ​​of all matching metadata items from
632    /// the EPUB metadata based on the given property name (key).
633    ///
634    /// ## Parameters
635    /// - `key`: The name of the metadata attribute to retrieve
636    ///
637    /// ## Return
638    /// - `Some(Vec<String>)`: A vector containing all matching metadata item values
639    /// - `None`: If no matching metadata items are found
640    pub fn get_metadata_value(&self, key: &str) -> Option<Vec<String>> {
641        let values = self
642            .metadata
643            .iter()
644            .filter(|item| item.property == key)
645            .map(|item| item.value.clone())
646            .collect::<Vec<String>>();
647
648        (!values.is_empty()).then_some(values)
649    }
650
651    /// Retrieves the title of the publication
652    ///
653    /// This function retrieves all title information from the EPUB metadata.
654    /// According to the EPUB specification, a publication can have multiple titles,
655    /// which are returned in the order they appear in the metadata.
656    ///
657    /// ## Return
658    /// - `Result<Vec<String>, EpubError>`: A vector containing all title information
659    /// - `EpubError`: If and only if the OPF file does not contain `<dc:title>`
660    ///
661    /// ## Notes
662    /// - The EPUB specification requires each publication to have at least one title.
663    #[inline]
664    pub fn get_title(&self) -> Vec<String> {
665        self.get_metadata_value("title")
666            .expect("missing required 'title' metadata which is required by the EPUB specification")
667    }
668
669    /// Retrieves the language used in the publication
670    ///
671    /// This function retrieves the language information of a publication from the EPUB
672    /// metadata. According to the EPUB specification, language information identifies
673    /// the primary language of the publication and can have multiple language identifiers.
674    ///
675    /// ## Return
676    /// - `Ok(Vec<String>)`: A vector containing all language identifiers
677    /// - `Err(EpubError)`: If and only if the OPF file does not contain `<dc:language>`
678    ///
679    /// ## Notes
680    /// - The EPUB specification requires that each publication specify at least one primary language.
681    /// - Language identifiers should conform to RFC 3066 or later standards.
682    #[inline]
683    pub fn get_language(&self) -> Vec<String> {
684        self.get_metadata_value("language").expect(
685            "missing required 'language' metadata which is required by the EPUB specification",
686        )
687    }
688
689    /// Retrieves the identifier of a publication
690    ///
691    /// This function retrieves the identifier information of a publication from
692    /// the EPUB metadata. According to the EPUB specification, each publication
693    /// must have a identifier, typically an ISBN, UUID, or other unique identifier.
694    ///
695    /// ## Return
696    /// - `Ok(Vec<String>)`: A vector containing all identifier information
697    /// - `Err(EpubError)`: If and only if the OPF file does not contain `<dc:identifier>`
698    ///
699    /// ## Notes
700    /// - The EPUB specification requires each publication to have at least one identifier.
701    /// - In the OPF file, the `unique-identifier` attribute of the `<package>` element
702    ///   should point to a `<dc:identifier>` element used to uniquely identify the publication.
703    ///   This means that `unique-identifier` is not exactly equal to `<dc:identifier>`.
704    #[inline]
705    pub fn get_identifier(&self) -> Vec<String> {
706        self.get_metadata_value("identifier").expect(
707            "missing required 'identifier' metadata which is required by the EPUB specification",
708        )
709    }
710
711    /// Retrieve resource data by resource ID
712    ///
713    /// This function will find the resource with the specified ID in the manifest.
714    /// If the resource is encrypted, it will be automatically decrypted.
715    ///
716    /// ## Parameters
717    /// - `id`: The ID of the resource to retrieve
718    ///
719    /// ## Return
720    /// - `Ok((Vec<u8>, String))`: Successfully retrieved and decrypted resource data and
721    ///   the MIME type
722    /// - `Err(EpubError)`: Errors that occurred during the retrieval process
723    ///
724    /// ## Notes
725    /// - This function will automatically decrypt the resource if it is encrypted.
726    /// - For unsupported encryption methods, the corresponding error will be returned.
727    pub fn get_manifest_item(&self, id: &str) -> Result<(Vec<u8>, String), EpubError> {
728        let resource_item = self
729            .manifest
730            .get(id)
731            .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
732
733        self.get_resource(resource_item)
734    }
735
736    /// Retrieves resource item data by resource path
737    ///
738    /// This function retrieves resources from the manifest based on the input path.
739    /// The input path must be a relative path to the root directory of the EPUB container;
740    /// using an absolute path or a relative path to another location will result in an error.
741    ///
742    /// ## Parameters
743    /// - `path`: The path of the resource to retrieve
744    ///
745    /// ## Return
746    /// - `Ok((Vec<u8>, String))`: Successfully retrieved and decrypted resource data and
747    ///   the MIME type
748    /// - `Err(EpubError)`: Errors that occurred during the retrieval process
749    ///
750    /// ## Notes
751    /// - This function will automatically decrypt the resource if it is encrypted.
752    /// - For unsupported encryption methods, the corresponding error will be returned.
753    /// - Relative paths other than the root directory of the Epub container are not supported.
754    pub fn get_manifest_item_by_path(&self, path: &str) -> Result<(Vec<u8>, String), EpubError> {
755        let manifest = self
756            .manifest
757            .iter()
758            .find(|(_, item)| item.path.to_str().unwrap() == path)
759            .map(|(_, manifest)| manifest)
760            .ok_or_else(|| EpubError::ResourceNotFound { resource: path.to_string() })?;
761
762        self.get_resource(manifest)
763    }
764
765    /// Retrieves supported resource items by resource ID, with fallback mechanism supported
766    ///
767    /// This function attempts to retrieve the resource item with the specified ID and
768    /// checks if its MIME type is in the list of supported formats. If the current resource
769    /// format is not supported, it searches for a supported resource format along the
770    /// fallback chain according to the fallback mechanism defined in the EPUB specification.
771    ///
772    /// ## Parameters
773    /// - `id`: The ID of the resource to retrieve
774    /// - `supported_format`: A vector of supported MIME types
775    ///
776    /// ## Return
777    /// - `Ok((Vec<u8>, String))`: Successfully retrieved and decrypted resource data and
778    ///   the MIME type
779    /// - `Err(EpubError)`: Errors that occurred during the retrieval process
780    pub fn get_manifest_item_with_fallback(
781        &self,
782        id: &str,
783        supported_format: &[&str],
784    ) -> Result<(Vec<u8>, String), EpubError> {
785        let mut current_id = id;
786        let mut fallback_chain = Vec::<&str>::new();
787        'fallback: loop {
788            let manifest_item = self
789                .manifest
790                .get(current_id)
791                .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
792
793            if supported_format.contains(&manifest_item.mime.as_str()) {
794                return self.get_resource(manifest_item);
795            }
796
797            let fallback_id = match &manifest_item.fallback {
798                // The loop ends when no fallback resource exists
799                None => break 'fallback,
800
801                // End the loop when the loop continues to fallback if a fallback resource exists
802                Some(id) if fallback_chain.contains(&id.as_str()) => break 'fallback,
803
804                Some(id) => {
805                    fallback_chain.push(id.as_str());
806
807                    // Since only warnings are issued for fallback resource checks
808                    // during initialization, the issue of fallback resources possibly
809                    // not existing needs to be handled here.
810                    id.as_str()
811                }
812            };
813
814            current_id = fallback_id;
815        }
816
817        Err(EpubError::NoSupportedFileFormat)
818    }
819
820    /// Retrieves the cover of the EPUB document
821    ///
822    /// This function searches for the cover of the EPUB document by examining manifest
823    /// items in the manifest. It looks for manifest items whose ID or attribute contains
824    /// "cover" (case-insensitive) and attempts to retrieve the content of the first match.
825    ///
826    /// ## Return
827    /// - `Some((Vec<u8>, String))`: Successfully retrieved and decrypted cover data and
828    ///   the MIME type
829    /// - `None`: No cover resource was found
830    ///
831    /// ## Notes
832    /// - This function only returns the first successfully retrieved cover resource,
833    ///   even if multiple matches exist
834    /// - The retrieved cover may not be an image resource; users need to pay attention
835    ///   to the resource's MIME type.
836    pub fn get_cover(&self) -> Option<(Vec<u8>, String)> {
837        self.manifest
838            .values()
839            .filter(|manifest| {
840                manifest.id.to_ascii_lowercase().contains("cover")
841                    || manifest
842                        .properties
843                        .as_ref()
844                        .map(|properties| properties.to_ascii_lowercase().contains("cover"))
845                        .unwrap_or(false)
846            })
847            .find_map(|manifest| {
848                self.get_resource(manifest)
849                    .map_err(|err| log::warn!("{err}"))
850                    .ok()
851            })
852    }
853
854    /// Retrieves resource data by manifest item
855    fn get_resource(&self, resource_item: &ManifestItem) -> Result<(Vec<u8>, String), EpubError> {
856        let path = resource_item
857            .path
858            .to_str()
859            .expect("manifest item path should be valid UTF-8");
860
861        let mut archive = self.archive.lock()?;
862        let mut data = match archive.by_name(path) {
863            Ok(mut file) => {
864                let mut entry = Vec::<u8>::new();
865                file.read_to_end(&mut entry)?;
866                Ok(entry)
867            }
868            Err(ZipError::FileNotFound) => {
869                Err(EpubError::ResourceNotFound { resource: path.to_string() })
870            }
871            Err(err) => Err(EpubError::from(err)),
872        }?;
873
874        if let Some(method) = self.is_encryption_file(path) {
875            data = self.auto_dencrypt(&method, &mut data)?;
876        }
877
878        Ok((data, resource_item.mime.clone()))
879    }
880
881    /// Navigate to a specified chapter using the spine index
882    ///
883    /// This function retrieves the content data of the corresponding chapter based
884    /// on the index position in the EPUB spine. The spine defines the linear reading
885    /// order of the publication's content documents, and each spine item references
886    /// resources in the manifest.
887    ///
888    /// ## Parameters
889    /// - `index`: The index position in the spine, starting from 0
890    ///
891    /// ## Return
892    /// - `Some((Vec<u8>, String))`: Successfully retrieved chapter content data and the MIME type
893    /// - `None`: Index out of range or data retrieval error
894    ///
895    /// ## Notes
896    /// - The index must be less than the total number of spine projects.
897    /// - If the resource is encrypted, it will be automatically decrypted before returning.
898    /// - It does not check whether the Spine project follows a linear reading order.
899    pub fn navigate_by_spine_index(&mut self, index: usize) -> Option<(Vec<u8>, String)> {
900        if index >= self.spine.len() {
901            return None;
902        }
903
904        let manifest_id = self.spine[index].idref.as_ref();
905        self.current_spine_index.store(index, Ordering::SeqCst);
906        self.get_manifest_item(manifest_id)
907            .map_err(|err| log::warn!("{err}"))
908            .ok()
909    }
910
911    /// Navigate to the previous linear reading chapter
912    ///
913    /// This function searches backwards in the EPUB spine for the previous linear
914    /// reading chapter and returns the content data of that chapter. It only navigates
915    /// to chapters marked as linear reading.
916    ///
917    /// ## Return
918    /// - `Some((Vec<u8>, String))`: Successfully retrieved previous chapter content data and
919    ///   the MIME type
920    /// - `None`: Already in the first chapter, the current chapter is not linear,
921    ///   or data retrieval failed
922    pub fn spine_prev(&self) -> Option<(Vec<u8>, String)> {
923        let current_index = self.current_spine_index.load(Ordering::SeqCst);
924        if current_index == 0 || !self.spine[current_index].linear {
925            return None;
926        }
927
928        let prev_index = (0..current_index)
929            .rev()
930            .find(|&index| self.spine[index].linear)?;
931
932        self.current_spine_index.store(prev_index, Ordering::SeqCst);
933        let manifest_id = self.spine[prev_index].idref.as_ref();
934        self.get_manifest_item(manifest_id)
935            .map_err(|err| log::warn!("{err}"))
936            .ok()
937    }
938
939    /// Navigate to the next linear reading chapter
940    ///
941    /// This function searches forwards in the EPUB spine for the next linear reading
942    /// chapter and returns the content data of that chapter. It only navigates to
943    /// chapters marked as linear reading.
944    ///
945    /// ## Return
946    /// - `Some((Vec<u8>, String))`: Successfully retrieved next chapter content data and
947    ///   the MIME type
948    /// - `None`: Already in the last chapter, the current chapter is not linear,
949    ///   or data retrieval failed
950    pub fn spine_next(&mut self) -> Option<(Vec<u8>, String)> {
951        let current_index = self.current_spine_index.load(Ordering::SeqCst);
952        if current_index >= self.spine.len() - 1 || !self.spine[current_index].linear {
953            return None;
954        }
955
956        let next_index =
957            (current_index + 1..self.spine.len()).find(|&index| self.spine[index].linear)?;
958
959        self.current_spine_index.store(next_index, Ordering::SeqCst);
960        let manifest_id = self.spine[next_index].idref.as_ref();
961        self.get_manifest_item(manifest_id)
962            .map_err(|err| log::warn!("{err}"))
963            .ok()
964    }
965
966    /// Retrieves the content data of the current chapter
967    ///
968    /// This function returns the content data of the chapter at the current
969    /// index position in the EPUB spine.
970    ///
971    /// ## Return
972    /// - `Some((Vec<u8>, String))`: Successfully retrieved current chapter content data and
973    ///   the MIME type
974    /// - `None`: Data retrieval failed
975    pub fn spine_current(&self) -> Option<(Vec<u8>, String)> {
976        let manifest_id = self.spine[self.current_spine_index.load(Ordering::SeqCst)]
977            .idref
978            .as_ref();
979        self.get_manifest_item(manifest_id)
980            .map_err(|err| log::warn!("{err}"))
981            .ok()
982    }
983
984    /// Determine the EPUB version from the OPF file
985    ///
986    /// This function is used to detect the version of an epub file from an OPF file.
987    /// When the version attribute in the package is abnormal, version information will
988    /// be identified through some version characteristics of the epub file. An error is
989    /// returned when neither direct nor indirect methods can identify the version.
990    ///
991    /// ## Parameters
992    /// - `opf_element`: A reference to the OPF file element
993    fn determine_epub_version(opf_element: &XmlElement) -> Result<EpubVersion, EpubError> {
994        // Check the explicit version attribute
995        if let Some(version) = opf_element.get_attr("version") {
996            match version.as_str() {
997                "2.0" => return Ok(EpubVersion::Version2_0),
998                "3.0" => return Ok(EpubVersion::Version3_0),
999                _ => {}
1000            }
1001        }
1002
1003        let spine_element = opf_element
1004            .find_elements_by_name("spine")
1005            .next()
1006            .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?;
1007
1008        // Look for EPUB 2.x specific features
1009        if spine_element.get_attr("toc").is_some() {
1010            return Ok(EpubVersion::Version2_0);
1011        }
1012
1013        let manifest_element = opf_element
1014            .find_elements_by_name("manifest")
1015            .next()
1016            .ok_or_else(|| EpubError::NonCanonicalFile { tag: "manifest".to_string() })?;
1017
1018        // Look for EPUB 3.x specific features
1019        manifest_element
1020            .children()
1021            .find_map(|element| {
1022                if let Some(id) = element.get_attr("id") {
1023                    if id.eq("nav") {
1024                        return Some(EpubVersion::Version3_0);
1025                    }
1026                }
1027
1028                None
1029            })
1030            .ok_or(EpubError::UnrecognizedEpubVersion)
1031    }
1032
1033    /// Parse metadata elements under the Dublin Core namespace
1034    ///
1035    /// This function handles the `<metadata>` Dublin Core element in the OPF file (namespace
1036    /// is "http://purl.org/dc/elements/1.1/"). These elements usually contain the basic
1037    /// information of the publication, such as title, author, publication date, etc.
1038    ///
1039    /// ## Notes
1040    /// - In EPUB 3.0, granular information is handled by separate '<meta>' elements and 'refines' attributes
1041    /// - All text content is normalized by whitespace
1042    #[inline]
1043    fn parse_dc_metadata(
1044        &self,
1045        element: &XmlElement,
1046        metadata: &mut Vec<MetadataItem>,
1047        // refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1048    ) -> Result<(), EpubError> {
1049        let id = element.get_attr("id");
1050        let lang = element.get_attr("lang");
1051        let property = element.name.clone();
1052        let value = element.text().normalize_whitespace();
1053
1054        let refined = match self.version {
1055            // In EPUB 2.0, supplementary metadata (refinements) are represented
1056            // through other attribute data pairs of the tag.
1057            EpubVersion::Version2_0 => element
1058                .attributes
1059                .iter()
1060                .map(|(name, value)| {
1061                    let property = name.to_string();
1062                    let value = value.to_string().normalize_whitespace();
1063
1064                    MetadataRefinement {
1065                        refines: id.clone().unwrap(),
1066                        property,
1067                        value,
1068                        lang: None,
1069                        scheme: None,
1070                    }
1071                })
1072                .collect(),
1073            EpubVersion::Version3_0 => vec![],
1074        };
1075
1076        metadata.push(MetadataItem { id, property, value, lang, refined });
1077
1078        Ok(())
1079    }
1080
1081    /// Parse metadata elements under the OPF namespace
1082    ///
1083    /// This function handles the `<metadata>` OPF element in the OPF file (namespace
1084    /// is "http://www.idpf.org/2007/opf"). These elements include '<meta>' and '<link>',
1085    /// which are used to provide extended metadata and links to external resources for EPUB publications.
1086    ///
1087    /// ## Notes
1088    /// - The function is only responsible for distribution processing, and the
1089    ///   specific parsing logic is implemented in the dedicated function
1090    /// - All parsing results are added directly to the incoming collection and no new collection is returned
1091    #[inline]
1092    fn parse_opf_metadata(
1093        &self,
1094        element: &XmlElement,
1095        metadata: &mut Vec<MetadataItem>,
1096        metadata_link: &mut Vec<MetadataLinkItem>,
1097        refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1098    ) -> Result<(), EpubError> {
1099        match element.name.as_str() {
1100            "meta" => self.parse_meta_element(element, metadata, refinements),
1101            "link" => self.parse_link_element(element, metadata_link),
1102            _ => Ok(()),
1103        }
1104    }
1105
1106    #[inline]
1107    fn parse_meta_element(
1108        &self,
1109        element: &XmlElement,
1110        metadata: &mut Vec<MetadataItem>,
1111        refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1112    ) -> Result<(), EpubError> {
1113        match self.version {
1114            EpubVersion::Version2_0 => {
1115                let property = element
1116                    .get_attr("name")
1117                    .ok_or_else(|| EpubError::NonCanonicalFile { tag: element.tag_name() })?;
1118                let value = element
1119                    .get_attr("content")
1120                    .ok_or_else(|| EpubError::MissingRequiredAttribute {
1121                        tag: element.tag_name(),
1122                        attribute: "content".to_string(),
1123                    })?
1124                    .normalize_whitespace();
1125
1126                metadata.push(MetadataItem {
1127                    id: None,
1128                    property,
1129                    value,
1130                    lang: None,
1131                    refined: vec![],
1132                });
1133            }
1134
1135            EpubVersion::Version3_0 => {
1136                let property = element.get_attr("property").ok_or_else(|| {
1137                    EpubError::MissingRequiredAttribute {
1138                        tag: element.tag_name(),
1139                        attribute: "property".to_string(),
1140                    }
1141                })?;
1142                let value = element.text().normalize_whitespace();
1143                let lang = element.get_attr("lang");
1144
1145                if let Some(refines) = element.get_attr("refines") {
1146                    let id = refines.strip_prefix("#").unwrap_or(&refines).to_string();
1147                    let scheme = element.get_attr("scheme");
1148                    let refinement = MetadataRefinement {
1149                        refines: id.clone(),
1150                        property,
1151                        value,
1152                        lang,
1153                        scheme,
1154                    };
1155
1156                    if let Some(refinements) = refinements.get_mut(&id) {
1157                        refinements.push(refinement);
1158                    } else {
1159                        refinements.insert(id, vec![refinement]);
1160                    }
1161                } else {
1162                    let id = element.get_attr("id");
1163                    let item = MetadataItem {
1164                        id,
1165                        property,
1166                        value,
1167                        lang,
1168                        refined: vec![],
1169                    };
1170
1171                    metadata.push(item);
1172                };
1173            }
1174        }
1175        Ok(())
1176    }
1177
1178    #[inline]
1179    fn parse_link_element(
1180        &self,
1181        element: &XmlElement,
1182        metadata_link: &mut Vec<MetadataLinkItem>,
1183    ) -> Result<(), EpubError> {
1184        let href = element
1185            .get_attr("href")
1186            .ok_or_else(|| EpubError::MissingRequiredAttribute {
1187                tag: element.tag_name(),
1188                attribute: "href".to_string(),
1189            })?;
1190        let rel = element
1191            .get_attr("rel")
1192            .ok_or_else(|| EpubError::MissingRequiredAttribute {
1193                tag: element.tag_name(),
1194                attribute: "rel".to_string(),
1195            })?;
1196        let hreflang = element.get_attr("hreflang");
1197        let id = element.get_attr("id");
1198        let mime = element.get_attr("media-type");
1199        let properties = element.get_attr("properties");
1200
1201        metadata_link.push(MetadataLinkItem {
1202            href,
1203            rel,
1204            hreflang,
1205            id,
1206            mime,
1207            properties,
1208            refines: None,
1209        });
1210        Ok(())
1211    }
1212
1213    /// Recursively parse NCX navigation points from navMap or nested navPoint elements
1214    ///
1215    /// This function parses the hierarchical navigation structure defined in NCX files
1216    /// for EPUB 2.x documents. It handles nested navPoint elements to build a complete
1217    /// tree representation of the publication's table of contents.
1218    fn parse_nav_points(&self, parent_element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1219        let mut nav_points = Vec::new();
1220        for nav_point in parent_element.find_children_by_name("navPoint") {
1221            let label = match nav_point.find_children_by_name("navLabel").next() {
1222                Some(element) => element.text(),
1223                None => String::new(),
1224            };
1225
1226            let content = nav_point
1227                .find_children_by_name("content")
1228                .next()
1229                .map(|element| PathBuf::from(element.text()));
1230
1231            let play_order = nav_point
1232                .get_attr("playOrder")
1233                .and_then(|order| order.parse::<usize>().ok());
1234
1235            let children = self.parse_nav_points(nav_point)?;
1236
1237            nav_points.push(NavPoint { label, content, play_order, children });
1238        }
1239
1240        nav_points.sort();
1241        Ok(nav_points)
1242    }
1243
1244    /// Recursively parses directory list structures
1245    ///
1246    /// This function recursively parses HTML navigation list structures,
1247    /// converting `<ol>` and `<li>` elements into NavPoint structures.
1248    /// Multi-level nested directory structures are supported.
1249    fn parse_catalog_list(&self, element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1250        let mut catalog = Vec::new();
1251        for item in element.children() {
1252            if item.tag_name() != "li" {
1253                return Err(EpubError::NonCanonicalFile { tag: "li".to_string() });
1254            }
1255
1256            let title_element = item
1257                .find_children_by_names(&["span", "a"])
1258                .next()
1259                .ok_or_else(|| EpubError::NonCanonicalFile { tag: "span/a".to_string() })?;
1260            let content_href = title_element.get_attr("href").map(PathBuf::from);
1261            let sub_list = if let Some(list) = item.find_children_by_name("ol").next() {
1262                self.parse_catalog_list(list)?
1263            } else {
1264                vec![]
1265            };
1266
1267            catalog.push(NavPoint {
1268                label: title_element.text(),
1269                content: content_href,
1270                children: sub_list,
1271                play_order: None,
1272            });
1273        }
1274
1275        Ok(catalog)
1276    }
1277
1278    /// Converts relative paths in the manifest to normalized paths
1279    /// relative to the EPUB root directory
1280    ///
1281    /// This function processes the href attribute of resources in the EPUB
1282    /// manifest and converts it to a normalized path representation.
1283    /// It handles three types of paths:
1284    /// - Relative paths starting with `../` (checks if they exceed the EPUB package scope)
1285    /// - Absolute paths starting with `/` (relative to the EPUB root directory)
1286    /// - Other relative paths (relative to the directory containing the OPF file)
1287    ///
1288    /// ## Parameters
1289    /// - `path`: The href attribute value of the resource in the manifest
1290    ///
1291    /// ## Return
1292    /// - `Ok(PathBuf)`: The parsed normalized path
1293    /// - `Err(EpubError)`: Relative link leakage
1294    #[inline]
1295    fn normalize_manifest_path(&self, path: &str) -> Result<PathBuf, EpubError> {
1296        let mut path = if path.starts_with("../") {
1297            let mut current_dir = self.epub_path.join(&self.package_path);
1298            current_dir.pop();
1299
1300            check_realtive_link_leakage(self.epub_path.clone(), current_dir, path)
1301                .map(PathBuf::from)
1302                .ok_or_else(|| EpubError::RelativeLinkLeakage { path: path.to_string() })?
1303        } else if let Some(path) = path.strip_prefix("/") {
1304            PathBuf::from(path.to_string())
1305        } else {
1306            self.base_path.join(path)
1307        };
1308
1309        #[cfg(windows)]
1310        {
1311            path = PathBuf::from(path.to_string_lossy().replace('\\', "/"));
1312        }
1313
1314        Ok(path)
1315    }
1316
1317    /// Verify the fallback chain of all manifest items
1318    ///
1319    /// This function iterates through all manifest items with the fallback
1320    /// attribute and verifies the validity of their fallback chains, including checking:
1321    /// - Whether circular references exist
1322    /// - Whether the fallback resource exists in the manifest
1323    ///
1324    /// ## Notes
1325    /// If an invalid fallback chain is found, a warning log will be logged
1326    /// but the processing flow will not be interrupted.
1327    // TODO: consider using BFS to validate fallback chains, to provide efficient
1328    fn validate_fallback_chains(&self) {
1329        for (id, item) in &self.manifest {
1330            if item.fallback.is_none() {
1331                continue;
1332            }
1333
1334            let mut fallback_chain = Vec::new();
1335            if let Err(msg) = self.validate_fallback_chain(id, &mut fallback_chain) {
1336                log::warn!("Invalid fallback chain for item {}: {}", id, msg);
1337            }
1338        }
1339    }
1340
1341    /// Recursively verify the validity of a single fallback chain
1342    ///
1343    /// This function recursively traces the fallback chain to check for the following issues:
1344    /// - Circular reference
1345    /// - The referenced fallback resource does not exist
1346    ///
1347    /// ## Parameters
1348    /// - `manifest_id`: The id of the manifest item currently being verified
1349    /// - `fallback_chain`: The visited fallback chain paths used to detect circular references
1350    ///
1351    /// ## Return
1352    /// - `Ok(())`: The fallback chain is valid
1353    /// - `Err(String)`: A string containing error information
1354    fn validate_fallback_chain(
1355        &self,
1356        manifest_id: &str,
1357        fallback_chain: &mut Vec<String>,
1358    ) -> Result<(), String> {
1359        if fallback_chain.contains(&manifest_id.to_string()) {
1360            fallback_chain.push(manifest_id.to_string());
1361
1362            return Err(format!(
1363                "Circular reference detected in fallback chain for {}",
1364                fallback_chain.join("->")
1365            ));
1366        }
1367
1368        // Get the current item; its existence can be ensured based on the calling context.
1369        let item = self.manifest.get(manifest_id).unwrap();
1370
1371        if let Some(fallback_id) = &item.fallback {
1372            if !self.manifest.contains_key(fallback_id) {
1373                return Err(format!(
1374                    "Fallback resource {} does not exist in manifest",
1375                    fallback_id
1376                ));
1377            }
1378
1379            fallback_chain.push(manifest_id.to_string());
1380            self.validate_fallback_chain(fallback_id, fallback_chain)
1381        } else {
1382            // The end of the fallback chain
1383            Ok(())
1384        }
1385    }
1386
1387    /// Checks if a resource at the specified path is an encrypted file
1388    ///
1389    /// This function queries whether a specific resource path is marked as an encrypted
1390    /// file in the EPUB encryption information. It checks the encrypted data stored in
1391    /// `self.encryption`, looking for an entry that matches the given path.
1392    ///
1393    /// ## Parameters
1394    /// - `path`: The path of the resource to check
1395    ///
1396    /// ## Return
1397    /// - `Some(String)`: The encryption method used for the resource
1398    /// - `None`: The resource is not encrypted
1399    fn is_encryption_file(&self, path: &str) -> Option<String> {
1400        self.encryption.as_ref().and_then(|encryptions| {
1401            encryptions
1402                .iter()
1403                .find(|encryption| encryption.data == path)
1404                .map(|encryption| encryption.method.clone())
1405        })
1406    }
1407
1408    /// Automatically decrypts encrypted resource data
1409    ///
1410    /// Automatically decrypts data based on the provided encryption method.
1411    /// This function supports various encryption methods defined by the EPUB
1412    /// specification, including font obfuscation and the XML encryption standard.
1413    ///
1414    /// ## Parameters
1415    /// - `method`: The encryption method used for the resource
1416    /// - `data`: The encrypted resource data
1417    ///
1418    /// ## Return
1419    /// - `Ok(Vec<u8>)`: The decrypted resource data
1420    /// - `Err(EpubError)`: Unsupported encryption method
1421    ///
1422    /// ## Supported Encryption Methods
1423    /// - IDPF font obfuscation: `http://www.idpf.org/2008/embedding`
1424    /// - Adobe font obfuscation: `http://ns.adobe.com/pdf/enc#RC`
1425    #[inline]
1426    fn auto_dencrypt(&self, method: &str, data: &mut [u8]) -> Result<Vec<u8>, EpubError> {
1427        match method {
1428            "http://www.idpf.org/2008/embedding" => {
1429                Ok(idpf_font_dencryption(data, &self.unique_identifier))
1430            }
1431            "http://ns.adobe.com/pdf/enc#RC" => {
1432                Ok(adobe_font_dencryption(data, &self.unique_identifier))
1433            }
1434            _ => Err(EpubError::UnsupportedEncryptedMethod { method: method.to_string() }),
1435        }
1436    }
1437}
1438
1439impl EpubDoc<BufReader<File>> {
1440    /// Creates a new EPUB document instance
1441    ///
1442    /// This function is a convenience constructor for `EpubDoc`,
1443    /// used to create an EPUB parser instance directly from a file path.
1444    ///
1445    /// ## Parameters
1446    /// - `path`: The path to the EPUB file
1447    ///
1448    /// ## Return
1449    /// - `Ok(EpubDoc)`: The created EPUB document instance
1450    /// - `Err(EpubError)`: An error occurred during initialization
1451    pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, EpubError> {
1452        let file = File::open(&path).map_err(EpubError::from)?;
1453        let path = fs::canonicalize(path)?;
1454
1455        Self::from_reader(BufReader::new(file), path)
1456    }
1457
1458    /// Validates whether a file is a valid EPUB document
1459    ///
1460    /// This function attempts to open and parse the given file as an EPUB document.
1461    /// It performs basic validation to determine if the file conforms to the EPUB specification.
1462    ///
1463    /// ## Parameters
1464    /// - `path`: The path to the file to validate
1465    ///
1466    /// ## Returns
1467    /// - `Ok(true)`: The file is a valid EPUB document
1468    /// - `Ok(false)`: The file exists but is not a valid EPUB (e.g., missing required files,
1469    ///   invalid XML structure, unrecognized version)
1470    /// - `Err(EpubError)`: A critical error occurred (e.g., IO error, ZIP archive error,
1471    ///   encoding error, mutex poison)
1472    pub fn is_valid_epub<P: AsRef<Path>>(path: P) -> Result<bool, EpubError> {
1473        let result = EpubDoc::new(path);
1474
1475        match result {
1476            Ok(_) => Ok(true),
1477            Err(err) if Self::is_outside_error(&err) => Err(err),
1478            Err(_) => Ok(false),
1479        }
1480    }
1481
1482    /// Determines if an error is a "critical" external error that should be propagated
1483    ///
1484    /// ## Error Classification
1485    /// Outside errors (returned as `Err`):
1486    /// - ArchiveError: ZIP archive corruption or read errors
1487    /// - IOError: File system or read errors
1488    /// - MutexError: Thread synchronization errors
1489    /// - Utf8DecodeError: UTF-8 encoding errors
1490    /// - Utf16DecodeError: UTF-16 encoding errors
1491    /// - QuickXmlError: XML parser errors
1492    ///
1493    /// Irrelevant errors (returned as `Ok(false)`):
1494    /// - these errors could not have occurred in this situation.
1495    /// - EpubBuilderError
1496    /// - WalkDirError
1497    ///
1498    /// Content errors (returned as `Ok(false)`):
1499    /// - All other EpubError variants
1500    fn is_outside_error(err: &EpubError) -> bool {
1501        matches!(
1502            err,
1503            EpubError::ArchiveError { .. }
1504                | EpubError::IOError { .. }
1505                | EpubError::MutexError
1506                | EpubError::Utf8DecodeError { .. }
1507                | EpubError::Utf16DecodeError { .. }
1508                | EpubError::QuickXmlError { .. }
1509        )
1510    }
1511}
1512
1513#[cfg(test)]
1514mod tests {
1515    use std::{
1516        fs::File,
1517        io::BufReader,
1518        path::{Path, PathBuf},
1519    };
1520
1521    use crate::{epub::EpubDoc, error::EpubError, utils::XmlReader};
1522
1523    /// Section 3.3 package documents
1524    mod package_documents_tests {
1525        use std::{path::Path, sync::atomic::Ordering};
1526
1527        use crate::epub::{EpubDoc, EpubVersion};
1528
1529        /// ID: pkg-collections-unknown
1530        ///
1531        /// The package document contains a collection with an unknown role. The reading system must open the EPUB successfully.
1532        #[test]
1533        fn test_pkg_collections_unknown() {
1534            let epub_file = Path::new("./test_case/pkg-collections-unknown.epub");
1535            let doc = EpubDoc::new(epub_file);
1536            assert!(doc.is_ok());
1537        }
1538
1539        /// ID: pkg-creator-order
1540        ///
1541        /// Several creators are listed in the package document. The reading system must not display them out of order (but it may display only the first).
1542        #[test]
1543        fn test_pkg_creator_order() {
1544            let epub_file = Path::new("./test_case/pkg-creator-order.epub");
1545            let doc = EpubDoc::new(epub_file);
1546            assert!(doc.is_ok());
1547
1548            let doc = doc.unwrap();
1549            let creators = doc.get_metadata_value("creator");
1550            assert!(creators.is_some());
1551
1552            let creators = creators.unwrap();
1553            assert_eq!(creators.len(), 5);
1554            assert_eq!(
1555                creators,
1556                vec![
1557                    "Dave Cramer",
1558                    "Wendy Reid",
1559                    "Dan Lazin",
1560                    "Ivan Herman",
1561                    "Brady Duga",
1562                ]
1563            );
1564        }
1565
1566        /// ID: pkg-manifest-unknown
1567        ///
1568        /// The package document contains a manifest item with unknown properties. The reading system must open the EPUB successfully.
1569        #[test]
1570        fn test_pkg_manifest_order() {
1571            let epub_file = Path::new("./test_case/pkg-manifest-unknown.epub");
1572            let doc = EpubDoc::new(epub_file);
1573            assert!(doc.is_ok());
1574
1575            let doc = doc.unwrap();
1576            assert_eq!(doc.manifest.len(), 2);
1577            assert!(doc.get_manifest_item("nav").is_ok());
1578            assert!(doc.get_manifest_item("content_001").is_ok());
1579            assert!(doc.get_manifest_item("content_002").is_err());
1580        }
1581
1582        /// ID: pkg-meta-unknown
1583        ///
1584        /// The package document contains a meta tag with an unknown property. The reading system must open the EPUB successfully.
1585        #[test]
1586        fn test_pkg_meta_unknown() {
1587            let epub_file = Path::new("./test_case/pkg-meta-unknown.epub");
1588            let doc = EpubDoc::new(epub_file);
1589            assert!(doc.is_ok());
1590
1591            let doc = doc.unwrap();
1592            let value = doc.get_metadata_value("dcterms:isReferencedBy");
1593            assert!(value.is_some());
1594            let value = value.unwrap();
1595            assert_eq!(value.len(), 1);
1596            assert_eq!(
1597                value,
1598                vec!["https://www.w3.org/TR/epub-rs/#confreq-rs-pkg-meta-unknown"]
1599            );
1600
1601            let value = doc.get_metadata_value("dcterms:modified");
1602            assert!(value.is_some());
1603            let value = value.unwrap();
1604            assert_eq!(value.len(), 1);
1605            assert_eq!(value, vec!["2021-01-11T00:00:00Z"]);
1606
1607            let value = doc.get_metadata_value("dcterms:title");
1608            assert!(value.is_none());
1609        }
1610
1611        /// ID: pkg-meta-whitespace
1612        ///
1613        /// The package document's title and creator contain leading and trailing spaces along with excess internal whitespace. The reading system must render only a single space in all cases.
1614        #[test]
1615        fn test_pkg_meta_white_space() {
1616            let epub_file = Path::new("./test_case/pkg-meta-whitespace.epub");
1617            let doc = EpubDoc::new(epub_file);
1618            assert!(doc.is_ok());
1619
1620            let doc = doc.unwrap();
1621            let value = doc.get_metadata_value("creator");
1622            assert!(value.is_some());
1623            let value = value.unwrap();
1624            assert_eq!(value.len(), 1);
1625            assert_eq!(value, vec!["Dave Cramer"]);
1626
1627            let value = doc.get_metadata_value("description");
1628            assert!(value.is_some());
1629            let value = value.unwrap();
1630            assert_eq!(value.len(), 1);
1631            assert_eq!(
1632                value,
1633                vec![
1634                    "The package document's title and creator contain leading and trailing spaces along with excess internal whitespace. The reading system must render only a single space in all cases."
1635                ]
1636            );
1637        }
1638
1639        /// ID: pkg-spine-duplicate-item-hyperlink
1640        ///
1641        /// The spine contains several references to the same content document. The reading system must move to the position of the first duplicate in the reading order when following a hyperlink.
1642        #[test]
1643        fn test_pkg_spine_duplicate_item_hyperlink() {
1644            let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-hyperlink.epub");
1645            let doc = EpubDoc::new(epub_file);
1646            assert!(doc.is_ok());
1647
1648            let mut doc = doc.unwrap();
1649            assert_eq!(doc.spine.len(), 4);
1650            assert_eq!(
1651                doc.navigate_by_spine_index(0).unwrap(),
1652                doc.get_manifest_item("content_001").unwrap()
1653            );
1654            assert_eq!(
1655                doc.navigate_by_spine_index(1).unwrap(),
1656                doc.get_manifest_item("content_002").unwrap()
1657            );
1658            assert_eq!(
1659                doc.navigate_by_spine_index(2).unwrap(),
1660                doc.get_manifest_item("content_002").unwrap()
1661            );
1662            assert_eq!(
1663                doc.navigate_by_spine_index(3).unwrap(),
1664                doc.get_manifest_item("content_002").unwrap()
1665            );
1666        }
1667
1668        /// ID: pkg-spine-duplicate-item-rendering
1669        ///
1670        /// The spine contains several references to the same content document. The reading system must not skip the duplicates when rendering the reading order.
1671        #[test]
1672        fn test_pkg_spine_duplicate_item_rendering() {
1673            let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-rendering.epub");
1674            let doc = EpubDoc::new(epub_file);
1675            assert!(doc.is_ok());
1676
1677            let mut doc = doc.unwrap();
1678            assert_eq!(doc.spine.len(), 4);
1679
1680            let result = doc.spine_prev();
1681            assert!(result.is_none());
1682
1683            let result = doc.spine_next();
1684            assert!(result.is_some());
1685
1686            doc.spine_next();
1687            doc.spine_next();
1688            let result = doc.spine_next();
1689            assert!(result.is_none());
1690        }
1691
1692        /// ID: pkg-spine-nonlinear-activation
1693        ///
1694        /// An itemref in the spine is marked as non-linear. Although it (possibly) cannot be accessed through the table of contents, it can be reached from a link in the XHTML content.
1695        #[test]
1696        fn test_pkg_spine_nonlinear_activation() {
1697            let epub_file = Path::new("./test_case/pkg-spine-nonlinear-activation.epub");
1698            let doc = EpubDoc::new(epub_file);
1699            assert!(doc.is_ok());
1700
1701            let mut doc = doc.unwrap();
1702            assert!(doc.spine_prev().is_none());
1703            assert!(doc.spine_next().is_none());
1704
1705            assert!(doc.navigate_by_spine_index(1).is_some());
1706            assert!(doc.spine_prev().is_none());
1707            assert!(doc.spine_next().is_none());
1708        }
1709
1710        /// ID: pkg-spine-order
1711        ///
1712        /// Basic test of whether a reading system can display spine items in the correct order. The test fails if the reading system presents content in the order in which the file names sort, or if it presents files in manifest order rather than spine order.
1713        #[test]
1714        fn test_pkg_spine_order() {
1715            let epub_file = Path::new("./test_case/pkg-spine-order.epub");
1716            let doc = EpubDoc::new(epub_file);
1717            assert!(doc.is_ok());
1718
1719            let doc = doc.unwrap();
1720            assert_eq!(doc.spine.len(), 4);
1721            assert_eq!(
1722                doc.spine
1723                    .iter()
1724                    .map(|item| item.idref.clone())
1725                    .collect::<Vec<String>>(),
1726                vec![
1727                    "d-content_001",
1728                    "c-content_002",
1729                    "b-content_003",
1730                    "a-content_004",
1731                ]
1732            );
1733        }
1734
1735        /// ID: pkg-spine-order-svg
1736        ///
1737        /// Basic test of whether a reading system can display SVG spine items in the correct order.
1738        #[test]
1739        fn test_spine_order_svg() {
1740            let epub_file = Path::new("./test_case/pkg-spine-order-svg.epub");
1741            let doc = EpubDoc::new(epub_file);
1742            assert!(doc.is_ok());
1743
1744            let mut doc = doc.unwrap();
1745            assert_eq!(doc.spine.len(), 4);
1746
1747            loop {
1748                if let Some(spine) = doc.spine_next() {
1749                    let idref = doc.spine[doc.current_spine_index.load(Ordering::Relaxed)]
1750                        .idref
1751                        .clone();
1752                    let resource = doc.get_manifest_item(&idref);
1753                    assert!(resource.is_ok());
1754
1755                    let resource = resource.unwrap();
1756                    assert_eq!(spine, resource);
1757                } else {
1758                    break;
1759                }
1760            }
1761
1762            assert_eq!(doc.current_spine_index.load(Ordering::Relaxed), 3);
1763        }
1764
1765        /// ID: pkg-spine-unknown
1766        ///
1767        /// The package document contains a spine item with unknown properties. The reading system must open the EPUB successfully.
1768        #[test]
1769        fn test_pkg_spine_unknown() {
1770            let epub_file = Path::new("./test_case/pkg-spine-unknown.epub");
1771            let doc = EpubDoc::new(epub_file);
1772            assert!(doc.is_ok());
1773
1774            let doc = doc.unwrap();
1775            assert_eq!(doc.spine.len(), 1);
1776            assert_eq!(doc.spine[0].idref, "content_001");
1777            assert_eq!(doc.spine[0].id, None);
1778            assert_eq!(doc.spine[0].linear, true);
1779            assert_eq!(doc.spine[0].properties, Some("untrustworthy".to_string()));
1780        }
1781
1782        /// ID: pkg-title-order
1783        ///
1784        /// Several titles are listed in the package document. The reading system must use the first title (and whether to use other titles is not defined).
1785        #[test]
1786        fn test_pkg_title_order() {
1787            let epub_file = Path::new("./test_case/pkg-title-order.epub");
1788            let doc = EpubDoc::new(epub_file);
1789            assert!(doc.is_ok());
1790
1791            let doc = doc.unwrap();
1792            let title_list = doc.get_title();
1793            assert_eq!(title_list.len(), 6);
1794            assert_eq!(
1795                title_list,
1796                vec![
1797                    "pkg-title-order",
1798                    "This title must not display first",
1799                    "Also, this title must not display first",
1800                    "This title also must not display first",
1801                    "This title must also not display first",
1802                    "This title must not display first, also",
1803                ]
1804            );
1805        }
1806
1807        /// ID: pkg-unique-id
1808        ///
1809        /// The package document's dc:identifier is identical across two publications. The reading system should display both publications independently.
1810        #[test]
1811        fn test_pkg_unique_id() {
1812            let epub_file = Path::new("./test_case/pkg-unique-id.epub");
1813            let doc_1 = EpubDoc::new(epub_file);
1814            assert!(doc_1.is_ok());
1815
1816            let epub_file = Path::new("./test_case/pkg-unique-id_duplicate.epub");
1817            let doc_2 = EpubDoc::new(epub_file);
1818            assert!(doc_2.is_ok());
1819
1820            let doc_1 = doc_1.unwrap();
1821            let doc_2 = doc_2.unwrap();
1822
1823            assert_eq!(doc_1.get_identifier(), doc_2.get_identifier());
1824            assert_eq!(doc_1.unique_identifier, "pkg-unique-id");
1825            assert_eq!(doc_2.unique_identifier, "pkg-unique-id");
1826        }
1827
1828        /// ID: pkg-version-backward
1829        ///
1830        /// “Reading Systems MUST attempt to process an EPUB Publication whose Package Document version attribute is less than "3.0"”. This is an EPUB with package version attribute set to "0", to see if a reading system will open it.
1831        #[test]
1832        fn test_pkg_version_backward() {
1833            let epub_file = Path::new("./test_case/pkg-version-backward.epub");
1834            let doc = EpubDoc::new(epub_file);
1835            assert!(doc.is_ok());
1836
1837            let doc = doc.unwrap();
1838            assert_eq!(doc.version, EpubVersion::Version3_0);
1839        }
1840
1841        /// ID: pkg-linked-records
1842        ///
1843        /// Reading System must process and display the title and creator metadata from the package document. An ONIX 3.0 format linked metadata record exists, but contains neither title nor creator metadata.
1844        #[test]
1845        fn test_pkg_linked_records() {
1846            let epub_file = Path::new("./test_case/pkg-linked-records.epub");
1847            let doc = EpubDoc::new(epub_file);
1848            assert!(doc.is_ok());
1849
1850            let doc = doc.unwrap();
1851            assert_eq!(doc.metadata_link.len(), 3);
1852
1853            let item = doc.metadata_link.iter().find(|&item| {
1854                if let Some(properties) = &item.properties {
1855                    properties.eq("onix")
1856                } else {
1857                    false
1858                }
1859            });
1860            assert!(item.is_some());
1861        }
1862
1863        /// ID: pkg-manifest-unlisted-resource
1864        ///
1865        /// The XHTML content references an image that does not appear in the manifest. The image should not be shown.
1866        #[test]
1867        fn test_pkg_manifest_unlisted_resource() {
1868            let epub_file = Path::new("./test_case/pkg-manifest-unlisted-resource.epub");
1869            let doc = EpubDoc::new(epub_file);
1870            assert!(doc.is_ok());
1871
1872            let doc = doc.unwrap();
1873            assert!(
1874                doc.get_manifest_item_by_path("EPUB/content_001.xhtml")
1875                    .is_ok()
1876            );
1877
1878            assert!(doc.get_manifest_item_by_path("EPUB/red.png").is_err());
1879            let err = doc.get_manifest_item_by_path("EPUB/red.png").unwrap_err();
1880            assert_eq!(
1881                err.to_string(),
1882                "Resource not found: Unable to find resource from \"EPUB/red.png\"."
1883            );
1884        }
1885    }
1886
1887    /// Section 3.4 manifest fallbacks
1888    ///
1889    /// The tests under this module seem to favor the reading system rather than the EPUB format itself
1890    mod manifest_fallbacks_tests {
1891        use std::path::Path;
1892
1893        use crate::epub::EpubDoc;
1894
1895        /// ID: pub-foreign_bad-fallback
1896        ///
1897        /// This is a test of manifest fallbacks where both the spine item and the fallback are likely to be unsupported. The spine item is a DMG, with a fallback to a PSD file. Reading systems may raise an error on the ingenstion workflow.
1898        #[test]
1899        fn test_pub_foreign_bad_fallback() {
1900            let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
1901            let doc = EpubDoc::new(epub_file);
1902            assert!(doc.is_ok());
1903
1904            let doc = doc.unwrap();
1905            assert!(doc.get_manifest_item("content_001").is_ok());
1906            assert!(doc.get_manifest_item("bar").is_ok());
1907
1908            assert_eq!(
1909                doc.get_manifest_item_with_fallback("content_001", &vec!["application/xhtml+xml"])
1910                    .unwrap_err()
1911                    .to_string(),
1912                "No supported file format: The fallback resource does not contain the file format you support."
1913            );
1914        }
1915
1916        /// ID: pub-foreign_image
1917        ///
1918        /// An HTML content file contains a PSD image, with a manifest fallback to a PNG image. This tests fallbacks for resources that are not in the spine.
1919        #[test]
1920        fn test_pub_foreign_image() {
1921            let epub_file = Path::new("./test_case/pub-foreign_image.epub");
1922            let doc = EpubDoc::new(epub_file);
1923            assert!(doc.is_ok());
1924
1925            let doc = doc.unwrap();
1926            let result = doc.get_manifest_item_with_fallback(
1927                "image-tiff",
1928                &vec!["image/png", "application/xhtml+xml"],
1929            );
1930            assert!(result.is_ok());
1931
1932            let (_, mime) = result.unwrap();
1933            assert_eq!(mime, "image/png");
1934        }
1935
1936        /// ID: pub-foreign_json-spine
1937        ///
1938        /// This EPUB uses a JSON content file in the spine, with a manifest fallback to an HTML document. If the reading system does not support JSON, it should display the HTML.
1939        #[test]
1940        fn test_pub_foreign_json_spine() {
1941            let epub_file = Path::new("./test_case/pub-foreign_json-spine.epub");
1942            let doc = EpubDoc::new(epub_file);
1943            assert!(doc.is_ok());
1944
1945            let doc = doc.unwrap();
1946            let result = doc.get_manifest_item_with_fallback(
1947                "content_primary",
1948                &vec!["application/xhtml+xml", "application/json"],
1949            );
1950            assert!(result.is_ok());
1951            let (_, mime) = result.unwrap();
1952            assert_eq!(mime, "application/json");
1953
1954            let result = doc
1955                .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
1956            assert!(result.is_ok());
1957            let (_, mime) = result.unwrap();
1958            assert_eq!(mime, "application/xhtml+xml");
1959        }
1960
1961        /// ID: pub-foreign_xml-spine
1962        ///
1963        /// This EPUB uses an ordinary XML content file with mimetype application/xml in the spine, with a manifest fallback to an HTML document. If the reading system does not support XML, it should display the HTML.
1964        #[test]
1965        fn test_pub_foreign_xml_spine() {
1966            let epub_file = Path::new("./test_case/pub-foreign_xml-spine.epub");
1967            let doc = EpubDoc::new(epub_file);
1968            assert!(doc.is_ok());
1969
1970            let doc = doc.unwrap();
1971            let result = doc.get_manifest_item_with_fallback(
1972                "content_primary",
1973                &vec!["application/xhtml+xml", "application/xml"],
1974            );
1975            assert!(result.is_ok());
1976            let (_, mime) = result.unwrap();
1977            assert_eq!(mime, "application/xml");
1978
1979            let result = doc
1980                .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
1981            assert!(result.is_ok());
1982            let (_, mime) = result.unwrap();
1983            assert_eq!(mime, "application/xhtml+xml");
1984        }
1985
1986        /// ID: pub-foreign_xml-suffix-spine
1987        ///
1988        /// This EPUB uses an custom XML content file with mimetype application/dtc+xml in the spine, with a manifest fallback to an HTML document. If the reading system does not support XML, it should display the HTML.
1989        #[test]
1990        fn test_pub_foreign_xml_suffix_spine() {
1991            let epub_file = Path::new("./test_case/pub-foreign_xml-suffix-spine.epub");
1992            let doc = EpubDoc::new(epub_file);
1993            assert!(doc.is_ok());
1994
1995            let doc = doc.unwrap();
1996            let result = doc.get_manifest_item_with_fallback(
1997                "content_primary",
1998                &vec!["application/xhtml+xml", "application/dtc+xml"],
1999            );
2000            assert!(result.is_ok());
2001            let (_, mime) = result.unwrap();
2002            assert_eq!(mime, "application/dtc+xml");
2003
2004            let result = doc
2005                .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
2006            assert!(result.is_ok());
2007            let (_, mime) = result.unwrap();
2008            assert_eq!(mime, "application/xhtml+xml");
2009        }
2010    }
2011
2012    /// Section 3.9 open container format
2013    mod open_container_format_tests {
2014        use std::{cmp::min, io::Read, path::Path};
2015
2016        use sha1::{Digest, Sha1};
2017
2018        use crate::epub::EpubDoc;
2019
2020        /// ID: ocf-metainf-inc
2021        ///
2022        /// An extra configuration file, not in the reserved files' list, is added to the META-INF folder; this file must be ignored.
2023        #[test]
2024        fn test_ocf_metainf_inc() {
2025            let epub_file = Path::new("./test_case/ocf-metainf-inc.epub");
2026            let doc = EpubDoc::new(epub_file);
2027            assert!(doc.is_ok());
2028        }
2029
2030        /// ID: ocf-metainf-manifest
2031        ///
2032        /// An ancillary manifest file, containing an extra spine item, is present in the META-INF directory; this extra item must be ignored by the reading system.
2033        #[test]
2034        fn test_ocf_metainf_manifest() {
2035            let epub_file = Path::new("./test_case/ocf-metainf-manifest.epub");
2036            let doc = EpubDoc::new(epub_file);
2037            assert!(doc.is_ok());
2038        }
2039
2040        /// ID: ocf-package_arbitrary
2041        ///
2042        /// The EPUB contains three valid package files and three corresponding sets of content documents, but only one of the packages, in an unusual subdirectory, is referenced by the container.xml file. The reading system must use this package.
2043        #[test]
2044        fn test_ocf_package_arbitrary() {
2045            let epub_file = Path::new("./test_case/ocf-package_arbitrary.epub");
2046            let doc = EpubDoc::new(epub_file);
2047            assert!(doc.is_ok());
2048
2049            let doc = doc.unwrap();
2050            assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2051        }
2052
2053        /// ID: ocf-package_multiple
2054        ///
2055        /// The EPUB contains three valid package files and three corresponding sets of content documents, all referenced by the container.xml file. The reading system must use the first package.
2056        #[test]
2057        fn test_ocf_package_multiple() {
2058            let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2059            let doc = EpubDoc::new(epub_file);
2060            assert!(doc.is_ok());
2061
2062            let doc = doc.unwrap();
2063            assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2064            assert_eq!(doc.base_path, Path::new("FOO/BAR"));
2065        }
2066
2067        /// ID: ocf-url_link-leaking-relative
2068        ///
2069        /// Use a relative link with several double-dot path segments from the content to a photograph. The folder hierarchy containing the photograph starts at the root level; the relative image reference exceeds depth of hierarchy.
2070        #[test]
2071        fn test_ocf_url_link_leaking_relative() {
2072            let epub_file = Path::new("./test_case/ocf-url_link-leaking-relative.epub");
2073            let doc = EpubDoc::new(epub_file);
2074            assert!(doc.is_err());
2075            assert_eq!(
2076                doc.err().unwrap().to_string(),
2077                String::from(
2078                    "Relative link leakage: Path \"../../../../media/imgs/monastery.jpg\" is out of container range."
2079                )
2080            )
2081        }
2082
2083        /// ID: ocf-url_link-path-absolute
2084        ///
2085        /// Use a path-absolute link, i.e., beginning with a leading slash, from the content to a photograph. The folder hierarchy containing the photograph starts at the root level.
2086        #[test]
2087        fn test_ocf_url_link_path_absolute() {
2088            let epub_file = Path::new("./test_case/ocf-url_link-path-absolute.epub");
2089            let doc = EpubDoc::new(epub_file);
2090            assert!(doc.is_ok());
2091
2092            let doc = doc.unwrap();
2093            let resource = doc.manifest.get("photo").unwrap();
2094            assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2095        }
2096
2097        /// ID: ocf-url_link-relative
2098        ///
2099        /// A simple relative link from the content to a photograph. The folder hierarchy containing the photograph starts at the root level.
2100        #[test]
2101        fn test_ocf_url_link_relative() {
2102            let epub_file = Path::new("./test_case/ocf-url_link-relative.epub");
2103            let doc = EpubDoc::new(epub_file);
2104            assert!(doc.is_ok());
2105
2106            let doc = doc.unwrap();
2107            let resource = doc.manifest.get("photo").unwrap();
2108            assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2109        }
2110
2111        /// ID: ocf-url_manifest
2112        ///
2113        /// The manifest refers to an XHTML file in an arbitrary subfolder. The reading system must be able to find the content.
2114        #[test]
2115        fn test_ocf_url_manifest() {
2116            let epub_file = Path::new("./test_case/ocf-url_manifest.epub");
2117            let doc = EpubDoc::new(epub_file);
2118            assert!(doc.is_ok());
2119
2120            let doc = doc.unwrap();
2121            assert!(doc.get_manifest_item("nav").is_ok());
2122            assert!(doc.get_manifest_item("content_001").is_ok());
2123            assert!(doc.get_manifest_item("content_002").is_err());
2124        }
2125
2126        /// ID: ocf-url_relative
2127        ///
2128        /// The manifest refers to an XHTML file in an arbitrary subfolder that is relative to the package's own arbitrary folder. The reading system must be able to find the content.
2129        #[test]
2130        fn test_ocf_url_relative() {
2131            let epub_file = Path::new("./test_case/ocf-url_relative.epub");
2132            let doc = EpubDoc::new(epub_file);
2133            assert!(doc.is_ok());
2134
2135            let doc = doc.unwrap();
2136            assert_eq!(doc.package_path, Path::new("foo/BAR/baz.opf"));
2137            assert_eq!(doc.base_path, Path::new("foo/BAR"));
2138            assert_eq!(
2139                doc.manifest.get("nav").unwrap().path,
2140                Path::new("foo/BAR/nav.xhtml")
2141            );
2142            assert_eq!(
2143                doc.manifest.get("content_001").unwrap().path,
2144                Path::new("foo/BAR/qux/content_001.xhtml")
2145            );
2146            assert!(doc.get_manifest_item("nav").is_ok());
2147            assert!(doc.get_manifest_item("content_001").is_ok());
2148        }
2149
2150        /// ID: ocf-zip-comp
2151        ///
2152        /// MUST treat any OCF ZIP container that uses compression techniques other than Deflate as in error.
2153        /// This test case does not use compression methods other than Deflate and cannot detect whether it is effective.
2154        #[test]
2155        fn test_ocf_zip_comp() {
2156            let epub_file = Path::new("./test_case/ocf-zip-comp.epub");
2157            let doc = EpubDoc::new(epub_file);
2158            assert!(doc.is_ok());
2159        }
2160
2161        /// ID: ocf-zip-mult
2162        ///
2163        /// MUST treat any OCF ZIP container that splits the content into segments as in error.
2164        /// This test case is not a segmented OCF ZIP container and cannot be tested to see if it is valid.
2165        #[test]
2166        fn test_ocf_zip_mult() {
2167            let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2168            let doc = EpubDoc::new(epub_file);
2169            assert!(doc.is_ok());
2170        }
2171
2172        /// ID: ocf-font_obfuscation
2173        ///
2174        /// An obfuscated (TrueType) font should be displayed after de-obfuscation.
2175        #[test]
2176        fn test_ocf_font_obfuscation() {
2177            let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2178            let doc = EpubDoc::new(epub_file);
2179            assert!(doc.is_ok());
2180
2181            let doc = doc.unwrap();
2182            let unique_id = doc.unique_identifier.clone();
2183
2184            let mut hasher = Sha1::new();
2185            hasher.update(unique_id.as_bytes());
2186            let hash = hasher.finalize();
2187            let mut key = vec![0u8; 1040];
2188            for i in 0..1040 {
2189                key[i] = hash[i % hash.len()];
2190            }
2191
2192            assert!(doc.encryption.is_some());
2193            assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2194
2195            let data = &doc.encryption.unwrap()[0];
2196            assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2197
2198            let font_file = doc
2199                .archive
2200                .lock()
2201                .unwrap()
2202                .by_name(&data.data)
2203                .unwrap()
2204                .bytes()
2205                .collect::<Result<Vec<u8>, _>>();
2206            assert!(font_file.is_ok());
2207            let font_file = font_file.unwrap();
2208
2209            // 根据EPUB规范,字体混淆是直接对字体文件进行的,不需要解压步骤,直接进行去混淆处理
2210            let mut deobfuscated = font_file.clone();
2211            for i in 0..min(1040, deobfuscated.len()) {
2212                deobfuscated[i] ^= key[i];
2213            }
2214
2215            assert!(is_valid_font(&deobfuscated));
2216        }
2217
2218        /// ID: ocf-font_obfuscation-bis
2219        ///
2220        /// An obfuscated (TrueType) font should not be displayed after de-obfuscation, because the obfuscation used a different publication id.
2221        #[test]
2222        fn test_ocf_font_obfuscation_bis() {
2223            let epub_file = Path::new("./test_case/ocf-font_obfuscation_bis.epub");
2224            let doc = EpubDoc::new(epub_file);
2225            assert!(doc.is_ok());
2226
2227            let doc = doc.unwrap();
2228
2229            let wrong_unique_id = "wrong-publication-id";
2230            let mut hasher = Sha1::new();
2231            hasher.update(wrong_unique_id.as_bytes());
2232            let hash = hasher.finalize();
2233            let mut wrong_key = vec![0u8; 1040];
2234            for i in 0..1040 {
2235                wrong_key[i] = hash[i % hash.len()];
2236            }
2237
2238            assert!(doc.encryption.is_some());
2239            assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2240
2241            let data = &doc.encryption.unwrap()[0];
2242            assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2243
2244            let font_file = doc
2245                .archive
2246                .lock()
2247                .unwrap()
2248                .by_name(&data.data)
2249                .unwrap()
2250                .bytes()
2251                .collect::<Result<Vec<u8>, _>>();
2252            assert!(font_file.is_ok());
2253            let font_file = font_file.unwrap();
2254
2255            // 使用错误的密钥进行去混淆
2256            let mut deobfuscated_with_wrong_key = font_file.clone();
2257            for i in 0..std::cmp::min(1040, deobfuscated_with_wrong_key.len()) {
2258                deobfuscated_with_wrong_key[i] ^= wrong_key[i];
2259            }
2260
2261            assert!(!is_valid_font(&deobfuscated_with_wrong_key));
2262        }
2263
2264        fn is_valid_font(data: &[u8]) -> bool {
2265            if data.len() < 4 {
2266                return false;
2267            }
2268            let sig = &data[0..4];
2269            // OTF: "OTTO"
2270            // TTF: 0x00010000, 0x00020000, "true", "typ1"
2271            sig == b"OTTO"
2272                || sig == b"\x00\x01\x00\x00"
2273                || sig == b"\x00\x02\x00\x00"
2274                || sig == b"true"
2275                || sig == b"typ1"
2276        }
2277    }
2278
2279    #[test]
2280    fn test_parse_container() {
2281        let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2282        let doc = EpubDoc::new(epub_file);
2283        assert!(doc.is_ok());
2284
2285        // let doc = doc.unwrap();
2286        let container = r#"
2287        <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2288            <rootfiles></rootfiles>
2289        </container>
2290        "#
2291        .to_string();
2292
2293        let result = EpubDoc::<BufReader<File>>::parse_container(container);
2294        assert!(result.is_err());
2295        assert_eq!(
2296            result.unwrap_err(),
2297            EpubError::NonCanonicalFile { tag: "rootfile".to_string() }
2298        );
2299
2300        let container = r#"
2301        <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2302            <rootfiles>
2303                <rootfile media-type="application/oebps-package+xml"/>
2304            </rootfiles>
2305        </container>
2306        "#
2307        .to_string();
2308
2309        let result = EpubDoc::<BufReader<File>>::parse_container(container);
2310        assert!(result.is_err());
2311        assert_eq!(
2312            result.unwrap_err(),
2313            EpubError::MissingRequiredAttribute {
2314                tag: "rootfile".to_string(),
2315                attribute: "full-path".to_string(),
2316            }
2317        );
2318
2319        let container = r#"
2320        <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2321            <rootfiles>
2322                <rootfile media-type="application/oebps-package+xml" full-path="EPUB/content.opf"/>
2323            </rootfiles>
2324        </container>
2325        "#
2326        .to_string();
2327
2328        let result = EpubDoc::<BufReader<File>>::parse_container(container);
2329        assert!(result.is_ok());
2330        assert_eq!(result.unwrap(), PathBuf::from("EPUB/content.opf"))
2331    }
2332
2333    #[test]
2334    fn test_parse_manifest() {
2335        let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2336        let doc = EpubDoc::new(epub_file);
2337        assert!(doc.is_ok());
2338
2339        let manifest = r#"
2340        <manifest>
2341            <item href="content_001.xhtml" media-type="application/xhtml+xml"/>
2342            <item properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2343        </manifest>
2344        "#;
2345        let mut doc = doc.unwrap();
2346        let element = XmlReader::parse(manifest);
2347        assert!(element.is_ok());
2348
2349        let element = element.unwrap();
2350        let result = doc.parse_manifest(&element);
2351        assert!(result.is_err());
2352        assert_eq!(
2353            result.unwrap_err(),
2354            EpubError::MissingRequiredAttribute {
2355                tag: "item".to_string(),
2356                attribute: "id".to_string(),
2357            },
2358        );
2359
2360        let manifest = r#"
2361        <manifest>
2362            <item id="content_001" media-type="application/xhtml+xml"/>
2363            <item id="nav" properties="nav" media-type="application/xhtml+xml"/>
2364        </manifest>
2365        "#;
2366        let element = XmlReader::parse(manifest);
2367        assert!(element.is_ok());
2368
2369        let element = element.unwrap();
2370        let result = doc.parse_manifest(&element);
2371        assert!(result.is_err());
2372        assert_eq!(
2373            result.unwrap_err(),
2374            EpubError::MissingRequiredAttribute {
2375                tag: "item".to_string(),
2376                attribute: "href".to_string(),
2377            },
2378        );
2379
2380        let manifest = r#"
2381        <manifest>
2382            <item id="content_001" href="content_001.xhtml"/>
2383            <item id="nav" properties="nav" href="nav.xhtml"/>
2384        </manifest>
2385        "#;
2386        let element = XmlReader::parse(manifest);
2387        assert!(element.is_ok());
2388
2389        let element = element.unwrap();
2390        let result = doc.parse_manifest(&element);
2391        assert!(result.is_err());
2392        assert_eq!(
2393            result.unwrap_err(),
2394            EpubError::MissingRequiredAttribute {
2395                tag: "item".to_string(),
2396                attribute: "media-type".to_string(),
2397            },
2398        );
2399
2400        let manifest = r#"
2401        <manifest>
2402            <item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
2403            <item id="nav" properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2404        </manifest>
2405        "#;
2406        let element = XmlReader::parse(manifest);
2407        assert!(element.is_ok());
2408
2409        let element = element.unwrap();
2410        let result = doc.parse_manifest(&element);
2411        assert!(result.is_ok());
2412    }
2413
2414    /// Test for function `has_encryption`
2415    #[test]
2416    fn test_fn_has_encryption() {
2417        let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2418        let doc = EpubDoc::new(epub_file);
2419        assert!(doc.is_ok());
2420
2421        let doc = doc.unwrap();
2422        assert!(doc.has_encryption());
2423    }
2424
2425    /// This test is used to detect whether the "META-INF/encryption.xml" file is parsed correctly
2426    #[test]
2427    fn test_fn_parse_encryption() {
2428        let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2429        let doc = EpubDoc::new(epub_file);
2430        assert!(doc.is_ok());
2431
2432        let doc = doc.unwrap();
2433        assert!(doc.encryption.is_some());
2434
2435        let encryption = doc.encryption.unwrap();
2436        assert_eq!(encryption.len(), 1);
2437        assert_eq!(encryption[0].method, "http://www.idpf.org/2008/embedding");
2438        assert_eq!(encryption[0].data, "EPUB/fonts/Lobster.ttf");
2439    }
2440
2441    #[test]
2442    fn test_get_metadata_existing_key() {
2443        let epub_file = Path::new("./test_case/epub-33.epub");
2444        let doc = EpubDoc::new(epub_file);
2445        assert!(doc.is_ok());
2446
2447        let doc = doc.unwrap();
2448
2449        let titles = doc.get_metadata("title");
2450        assert!(titles.is_some());
2451
2452        let titles = titles.unwrap();
2453        assert_eq!(titles.len(), 1);
2454        assert_eq!(titles[0].property, "title");
2455        assert_eq!(titles[0].value, "EPUB 3.3");
2456
2457        let languages = doc.get_metadata("language");
2458        assert!(languages.is_some());
2459
2460        let languages = languages.unwrap();
2461        assert_eq!(languages.len(), 1);
2462        assert_eq!(languages[0].property, "language");
2463        assert_eq!(languages[0].value, "en-us");
2464
2465        let language = doc.get_language();
2466        assert_eq!(language, vec!["en-us"]);
2467    }
2468
2469    #[test]
2470    fn test_get_metadata_nonexistent_key() {
2471        let epub_file = Path::new("./test_case/epub-33.epub");
2472        let doc = EpubDoc::new(epub_file);
2473        assert!(doc.is_ok());
2474
2475        let doc = doc.unwrap();
2476        let metadata = doc.get_metadata("nonexistent");
2477        assert!(metadata.is_none());
2478    }
2479
2480    #[test]
2481    fn test_get_metadata_multiple_items_same_type() {
2482        let epub_file = Path::new("./test_case/epub-33.epub");
2483        let doc = EpubDoc::new(epub_file);
2484        assert!(doc.is_ok());
2485
2486        let doc = doc.unwrap();
2487
2488        let creators = doc.get_metadata("creator");
2489        assert!(creators.is_some());
2490
2491        let creators = creators.unwrap();
2492        assert_eq!(creators.len(), 3);
2493
2494        assert_eq!(creators[0].id, Some("creator_id_0".to_string()));
2495        assert_eq!(creators[0].property, "creator");
2496        assert_eq!(creators[0].value, "Matt Garrish, DAISY Consortium");
2497
2498        assert_eq!(creators[1].id, Some("creator_id_1".to_string()));
2499        assert_eq!(creators[1].property, "creator");
2500        assert_eq!(creators[1].value, "Ivan Herman, W3C");
2501
2502        assert_eq!(creators[2].id, Some("creator_id_2".to_string()));
2503        assert_eq!(creators[2].property, "creator");
2504        assert_eq!(creators[2].value, "Dave Cramer, Invited Expert");
2505    }
2506
2507    #[test]
2508    fn test_get_metadata_with_refinement() {
2509        let epub_file = Path::new("./test_case/epub-33.epub");
2510        let doc = EpubDoc::new(epub_file);
2511        assert!(doc.is_ok());
2512
2513        let doc = doc.unwrap();
2514
2515        let title = doc.get_metadata("title");
2516        assert!(title.is_some());
2517
2518        let title = title.unwrap();
2519        assert_eq!(title.len(), 1);
2520        assert_eq!(title[0].refined.len(), 1);
2521        assert_eq!(title[0].refined[0].property, "title-type");
2522        assert_eq!(title[0].refined[0].value, "main");
2523    }
2524
2525    #[test]
2526    fn test_get_manifest_item_with_fallback() {
2527        let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
2528        let doc = EpubDoc::new(epub_file);
2529        assert!(doc.is_ok());
2530
2531        let doc = doc.unwrap();
2532        assert!(doc.get_manifest_item("content_001").is_ok());
2533        assert!(doc.get_manifest_item("bar").is_ok());
2534
2535        // 当回退链上存在可回退资源时能获取资源
2536        if let Ok((_, mime)) =
2537            doc.get_manifest_item_with_fallback("content_001", &vec!["image/psd"])
2538        {
2539            assert_eq!(mime, "image/psd");
2540        } else {
2541            assert!(false, "get_manifest_item_with_fallback failed");
2542        }
2543
2544        // 当回退链上不存在可回退资源时无法获取资源
2545        assert_eq!(
2546            doc.get_manifest_item_with_fallback("content_001", &vec!["application/xhtml+xml"])
2547                .unwrap_err()
2548                .to_string(),
2549            "No supported file format: The fallback resource does not contain the file format you support."
2550        );
2551    }
2552
2553    #[test]
2554    fn test_get_cover() {
2555        let epub_file = Path::new("./test_case/pkg-cover-image.epub");
2556        let doc = EpubDoc::new(epub_file);
2557        if let Err(err) = &doc {
2558            println!("{}", err);
2559        }
2560        assert!(doc.is_ok());
2561
2562        let doc = doc.unwrap();
2563        let result = doc.get_cover();
2564        assert!(result.is_some());
2565
2566        let (data, mime) = result.unwrap();
2567        assert_eq!(data.len(), 5785);
2568        assert_eq!(mime, "image/jpeg");
2569    }
2570
2571    #[test]
2572    fn test_epub_2() {
2573        let epub_file = Path::new("./test_case/epub-2.epub");
2574        let doc = EpubDoc::new(epub_file);
2575        assert!(doc.is_ok());
2576
2577        let doc = doc.unwrap();
2578
2579        let titles = doc.get_title();
2580        assert_eq!(titles, vec!["Minimal EPUB 2.0"]);
2581    }
2582
2583    #[test]
2584    fn test_is_valid_epub_valid_file() {
2585        let result = EpubDoc::is_valid_epub("./test_case/epub-2.epub");
2586        assert!(result.is_ok());
2587        assert_eq!(result.unwrap(), true);
2588    }
2589
2590    #[test]
2591    fn test_is_valid_epub_invalid_path() {
2592        let result = EpubDoc::is_valid_epub("./test_case/nonexistent.epub");
2593        assert!(result.is_err());
2594    }
2595
2596    #[test]
2597    fn test_is_valid_epub_corrupted_zip() {
2598        let temp_dir = std::env::temp_dir();
2599        let corrupted_file = temp_dir.join("corrupted.epub");
2600
2601        std::fs::write(&corrupted_file, b"not a valid zip file").unwrap();
2602
2603        let result = EpubDoc::is_valid_epub(&corrupted_file);
2604
2605        assert!(result.is_err());
2606        let err = result.unwrap_err();
2607        assert!(matches!(err, EpubError::ArchiveError { .. }));
2608
2609        std::fs::remove_file(corrupted_file).ok();
2610    }
2611
2612    #[test]
2613    fn test_is_valid_epub_valid_epub_3() {
2614        let result = EpubDoc::is_valid_epub("./test_case/epub-33.epub");
2615        assert!(result.is_ok());
2616        assert_eq!(result.unwrap(), true);
2617    }
2618
2619    #[test]
2620    fn test_is_outside_error() {
2621        let archive_error = EpubError::ArchiveError {
2622            source: zip::result::ZipError::Io(std::io::Error::new(
2623                std::io::ErrorKind::Other,
2624                "test",
2625            )),
2626        };
2627        assert!(EpubDoc::<BufReader<File>>::is_outside_error(&archive_error));
2628
2629        let io_error = EpubError::IOError {
2630            source: std::io::Error::new(std::io::ErrorKind::NotFound, "test"),
2631        };
2632        assert!(EpubDoc::<BufReader<File>>::is_outside_error(&io_error));
2633
2634        let non_canonical = EpubError::NonCanonicalEpub { expected_file: "test".to_string() };
2635        assert!(!EpubDoc::<BufReader<File>>::is_outside_error(
2636            &non_canonical
2637        ));
2638
2639        let missing_attr = EpubError::MissingRequiredAttribute {
2640            tag: "test".to_string(),
2641            attribute: "id".to_string(),
2642        };
2643        assert!(!EpubDoc::<BufReader<File>>::is_outside_error(&missing_attr));
2644    }
2645}