epub/
doc.rs

1//! Manages the epub doc.
2//!
3//! Provides easy methods to navigate through the epub content, cover,
4//! chapters, etc.
5//!
6//! Main references to EPUB specs:
7//! - https://www.w3.org/TR/epub-33
8//! - https://idpf.org/epub/201
9
10use std::cmp::Ordering;
11use std::collections::HashMap;
12use std::fs::File;
13use std::io::BufReader;
14use std::io::{Read, Seek};
15use std::path::{Component, Path, PathBuf};
16use xmlutils::XMLError;
17
18use crate::archive::EpubArchive;
19
20use crate::xmlutils;
21
22#[derive(Debug, thiserror::Error)]
23pub enum DocError {
24    #[error("Archive Error: {0}")]
25    ArchiveError(#[from] crate::archive::ArchiveError),
26    #[error("XML Error: {0}")]
27    XmlError(#[from] crate::xmlutils::XMLError),
28    #[error("I/O Error: {0}")]
29    IOError(#[from] std::io::Error),
30    #[error("Invalid EPub")]
31    InvalidEpub,
32}
33
34#[derive(Clone, Debug, PartialEq, PartialOrd)]
35pub enum EpubVersion {
36    Version2_0,
37    Version3_0,
38    Unknown(String),
39}
40
41/// Struct that represent a navigation point in a table of content
42#[derive(Clone, Debug, Eq)]
43pub struct NavPoint {
44    /// the title of this navpoint
45    pub label: String,
46    /// the resource path
47    pub content: PathBuf,
48    /// nested navpoints
49    pub children: Vec<NavPoint>,
50    /// the order in the toc
51    pub play_order: Option<usize>,
52}
53
54impl Ord for NavPoint {
55    fn cmp(&self, other: &Self) -> Ordering {
56        self.play_order.cmp(&other.play_order)
57    }
58}
59
60impl PartialOrd for NavPoint {
61    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
62        Some(self.cmp(other))
63    }
64}
65
66impl PartialEq for NavPoint {
67    fn eq(&self, other: &Self) -> bool {
68        self.play_order == other.play_order
69    }
70}
71
72/// An EPUB3 metadata subexpression.
73/// It is associated with another metadata expression.
74/// The design follows EPUB3 but can be approximated when facing EPUB2 using attributes.
75#[derive(Clone, Debug)]
76pub struct MetadataRefinement {
77    pub property: String,
78    pub value: String,
79    pub lang: Option<String>,
80    pub scheme: Option<String>,
81}
82
83/// An EPUB3 Dublin Core metadata item.
84/// The design follows EPUB3's dcterms element but can draw information both
85/// dcterms and primary `<meta>` expressions.
86///
87/// When facing EPUB2, it also draws information from XHTML1.1 `<meta>`.
88#[derive(Clone, Debug)]
89pub struct MetadataItem {
90    pub(crate) id: Option<String>,
91    pub property: String,
92    pub value: String,
93    pub lang: Option<String>,
94    pub refined: Vec<MetadataRefinement>,
95}
96
97impl MetadataItem {
98    pub fn refinement(&self, property: &str) -> Option<&MetadataRefinement> {
99        self.refined.iter().find(|r| r.property == property)
100    }
101}
102
103#[derive(Clone, Debug)]
104pub struct SpineItem {
105    pub idref: String,
106    pub id: Option<String>,
107    pub properties: Option<String>,
108    pub linear: bool,
109}
110
111#[derive(Clone, Debug)]
112pub struct ResourceItem {
113    pub path: PathBuf,
114    pub mime: String,
115    pub properties: Option<String>,
116}
117
118/// Struct to control the epub document
119///
120/// The general policy for `EpubDoc` is to support both EPUB2 (commonly used)
121/// and EPUB3 (standard). Considering epub files that have mixed EPUB2 and
122/// EPUB3 features, the implementation of `EpubDoc` isn't strict and rejects
123/// something not in accordance with the specified version only when necessary.
124#[derive(Clone, Debug)]
125pub struct EpubDoc<R: Read + Seek> {
126    /// the zip archive
127    archive: EpubArchive<R>,
128
129    /// The current chapter, is an spine index
130    current: usize,
131
132    /// epub spec version
133    pub version: EpubVersion,
134
135    /// epub spine ids
136    pub spine: Vec<SpineItem>,
137
138    /// resource id -> (path, mime)
139    pub resources: HashMap<String, ResourceItem>,
140
141    /// table of content, list of `NavPoint` in the toc.ncx
142    pub toc: Vec<NavPoint>,
143
144    /// title of toc
145    pub toc_title: String,
146
147    /// The epub metadata.
148    ///
149    /// # Examples
150    ///
151    /// ```
152    /// # use epub::doc::EpubDoc;
153    /// # let doc = EpubDoc::new("test.epub");
154    /// # let doc = doc.unwrap();
155    /// let title = doc.metadata.iter().find(|d| d.property == "title");
156    /// assert_eq!(title.unwrap().value, "Todo es mío");
157    /// ```
158    ///
159    /// See `mdata(property)` for a convenient method returning the first matching item.
160    pub metadata: Vec<MetadataItem>,
161
162    /// root file base path
163    pub root_base: PathBuf,
164
165    /// root file full path
166    pub root_file: PathBuf,
167
168    /// Custom css list to inject in every xhtml file
169    pub extra_css: Vec<String>,
170
171    /// unique identifier
172    pub unique_identifier: Option<String>,
173}
174
175/// A EpubDoc used for testing purposes
176#[cfg(feature = "mock")]
177impl EpubDoc<std::io::Cursor<Vec<u8>>> {
178    pub fn mock() -> Result<Self, DocError> {
179        // binary for empty zip file so that archive can be created
180        let data: Vec<u8> = vec![
181            0x50, 0x4b, 0x05, 0x06, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00,
182            00, 00,
183        ];
184
185        let archive = EpubArchive::from_reader(std::io::Cursor::new(data))?;
186        Ok(Self {
187            archive,
188            version: EpubVersion::Version2_0,
189            spine: vec![],
190            toc: vec![],
191            toc_title: String::new(),
192            resources: HashMap::new(),
193            metadata: Vec::new(),
194            root_file: PathBuf::new(),
195            root_base: PathBuf::new(),
196            current: 0,
197            extra_css: vec![],
198            unique_identifier: None,
199        })
200    }
201}
202
203impl EpubDoc<BufReader<File>> {
204    /// Opens the epub file in `path`.
205    ///
206    /// Initialize some internal variables to be able to access to the epub
207    /// spine definition and to navigate through the epub.
208    ///
209    /// # Examples
210    ///
211    /// ```
212    /// use epub::doc::EpubDoc;
213    ///
214    /// let doc = EpubDoc::new("test.epub");
215    /// assert!(doc.is_ok());
216    /// ```
217    ///
218    /// # Errors
219    ///
220    /// Returns an error if the epub is broken or if the file doesn't
221    /// exists.
222    pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, DocError> {
223        let path = path.as_ref();
224        let file = File::open(path)?;
225        let mut doc = Self::from_reader(BufReader::new(file))?;
226        doc.archive.path = path.to_path_buf();
227        Ok(doc)
228    }
229}
230
231impl<R: Read + Seek> EpubDoc<R> {
232    /// Opens the epub contained in `reader`.
233    ///
234    /// Initialize some internal variables to be able to access to the epub
235    /// spine definition and to navigate through the epub.
236    ///
237    /// # Examples
238    ///
239    /// ```
240    /// use epub::doc::EpubDoc;
241    /// use std::fs::File;
242    /// use std::io::{Cursor, Read};
243    ///
244    /// let mut file = File::open("test.epub").unwrap();
245    /// let mut buffer = Vec::new();
246    /// file.read_to_end(&mut buffer).unwrap();
247    ///
248    /// let cursor = Cursor::new(buffer);
249    ///
250    /// let doc = EpubDoc::from_reader(cursor);
251    /// assert!(doc.is_ok());
252    /// ```
253    ///
254    /// # Errors
255    ///
256    /// Returns an error if the epub is broken.
257    pub fn from_reader(reader: R) -> Result<Self, DocError> {
258        let mut archive = EpubArchive::from_reader(reader)?;
259
260        let container = archive.get_container_file()?;
261        let root_file = get_root_file(&container)?;
262        let base_path = root_file.parent().expect("All files have a parent");
263        let mut doc = Self {
264            archive,
265            version: EpubVersion::Version2_0,
266            spine: vec![],
267            toc: vec![],
268            toc_title: String::new(),
269            resources: HashMap::new(),
270            metadata: Vec::new(),
271            root_file: root_file.clone(),
272            root_base: base_path.to_path_buf(),
273            current: 0,
274            extra_css: vec![],
275            unique_identifier: None,
276        };
277        doc.fill_resources()?;
278        Ok(doc)
279    }
280
281    /// Returns the first metadata found with this property name.
282    ///
283    /// # Examples
284    ///
285    /// ```
286    /// # use epub::doc::EpubDoc;
287    /// # let doc = EpubDoc::new("test.epub");
288    /// # let doc = doc.unwrap();
289    /// let language = doc.mdata("language");
290    /// assert_eq!(language.unwrap().value, "es");
291    pub fn mdata(&self, property: &str) -> Option<&MetadataItem> {
292        self.metadata.iter().find(|data| data.property == property)
293    }
294
295    /// Returns the title.
296    ///
297    /// An EPUB file may provide multiple titles. This method only returns the
298    /// primary one. Access `metadata` directly to gain more control.
299    pub fn get_title(&self) -> Option<String> {
300        self.mdata("title").map(|item| item.value.clone())
301    }
302
303    /// Returns the id of the epub cover.
304    ///
305    /// # Examples
306    ///
307    /// ```rust
308    /// use epub::doc::EpubDoc;
309    ///
310    /// let doc = EpubDoc::new("test.epub");
311    /// assert!(doc.is_ok());
312    /// let mut doc = doc.unwrap();
313    ///
314    /// let cover_id = doc.get_cover_id();
315    /// ```
316    ///
317    /// This returns the cover id, which can be used to get the cover data.
318    /// The id is not guaranteed to be valid.
319    pub fn get_cover_id(&self) -> Option<String> {
320        match self.version {
321            // EPUB3 requires zero or one cover-image resource
322            EpubVersion::Version3_0 => self.resources.iter().find_map(|(id, resource)| {
323                resource
324                    .properties
325                    .as_ref()
326                    .and_then(|ps| ps.split_ascii_whitespace().find(|p| *p == "cover-image"))
327                    .map(|_| id.clone())
328            }),
329            // EPUB2 doesn't include cover identification, but a common practice is `<meta name="cover">`
330            _ => self.mdata("cover").map(|item| item.value.clone()),
331        }
332    }
333
334    /// Returns the id of the navigation document (EPUB3 only).
335    ///
336    /// **Relationship with `toc`**:
337    /// "Navigation document" is a concept formalized in EPUB3, superseding NCX
338    /// format used in EPUB2. NCX is required in EPUB2 and not EPUB3, though
339    /// some authors provide both in the archive. `self.toc` (parsed from NCX)
340    /// and this are independent on each other.
341    pub fn get_nav_id(&self) -> Option<String> {
342        match self.version {
343            // EPUB3 requires exactly one nav resource
344            EpubVersion::Version3_0 => self.resources.iter().find_map(|(id, resource)| {
345                resource
346                    .properties
347                    .as_ref()
348                    .and_then(|ps| ps.split_ascii_whitespace().find(|p| *p == "nav"))
349                    .map(|_| id.clone())
350            }),
351            // The concept of navigation document doesn't exist in EPUB2.
352            _ => None,
353        }
354    }
355
356    /// Returns the cover's content and mime-type
357    ///
358    /// # Examples
359    ///
360    /// ```rust,ignore
361    /// use std::fs;
362    /// use std::io::Write;
363    /// use epub::doc::EpubDoc;
364    ///
365    /// let doc = EpubDoc::new("test.epub");
366    /// assert!(doc.is_ok());
367    /// let mut doc = doc.unwrap();
368    ///
369    /// let cover_data = doc.get_cover().unwrap();
370    ///
371    /// let f = fs::File::create("/tmp/cover.png");
372    /// assert!(f.is_ok());
373    /// let mut f = f.unwrap();
374    /// let resp = f.write_all(&cover_data);
375    /// ```
376    ///
377    /// Returns [`None`] if the cover can't be found.
378    pub fn get_cover(&mut self) -> Option<(Vec<u8>, String)> {
379        let cover_id = self.get_cover_id();
380        cover_id.and_then(|cid| self.get_resource(&cid))
381    }
382
383    /// Returns Release Identifier defined at
384    /// https://www.w3.org/publishing/epub32/epub-packages.html#sec-metadata-elem-identifiers-pid
385    pub fn get_release_identifier(&self) -> Option<String> {
386        match (
387            self.unique_identifier.as_ref(),
388            self.mdata("dcterms:modified"),
389        ) {
390            (Some(unique_identifier), Some(modified)) => {
391                Some(format!("{}@{}", unique_identifier, modified.value))
392            }
393            _ => None,
394        }
395    }
396
397    /// Returns the resource content by full path in the epub archive
398    ///
399    /// Returns [`None`] if the path doesn't exist in the epub
400    pub fn get_resource_by_path<P: AsRef<Path>>(&mut self, path: P) -> Option<Vec<u8>> {
401        self.archive.get_entry(path).ok()
402    }
403
404    /// Returns the resource content and mime-type by the id defined in the spine
405    ///
406    /// Returns [`None`] if the id doesn't exists in the epub
407    pub fn get_resource(&mut self, id: &str) -> Option<(Vec<u8>, String)> {
408        let ResourceItem { path, mime, .. } = self.resources.get(id)?;
409        let path = path.clone();
410        let mime = mime.clone();
411        let content = self.get_resource_by_path(&path)?;
412        Some((content, mime))
413    }
414
415    /// Returns the resource content by full path in the epub archive, as String
416    ///
417    /// Returns [`None`] if the path doesn't exists in the epub
418    pub fn get_resource_str_by_path<P: AsRef<Path>>(&mut self, path: P) -> Option<String> {
419        self.archive.get_entry_as_str(path).ok()
420    }
421
422    /// Returns the resource content and mime-type by the id defined in the spine, as String
423    ///
424    /// Returns [`None`] if the id doesn't exists in the epub
425    pub fn get_resource_str(&mut self, id: &str) -> Option<(String, String)> {
426        let ResourceItem { path, mime, .. } = self.resources.get(id)?;
427        let mime = mime.clone();
428        let path = path.clone();
429        let content = self.get_resource_str_by_path(path)?;
430        Some((content, mime))
431    }
432
433    /// Returns the resource mime-type
434    ///
435    /// # Examples
436    ///
437    /// ```
438    /// # use epub::doc::EpubDoc;
439    /// # let doc = EpubDoc::new("test.epub");
440    /// # let doc = doc.unwrap();
441    /// let mime = doc.get_resource_mime("portada.png");
442    /// assert_eq!("image/png", mime.unwrap());
443    /// ```
444    ///
445    /// Returns [`None`] the resource can't be found.
446    pub fn get_resource_mime(&self, id: &str) -> Option<String> {
447        self.resources.get(id).map(|r| r.mime.clone())
448    }
449
450    /// Returns the resource mime searching by source full path
451    ///
452    /// # Examples
453    ///
454    /// ```
455    /// # use epub::doc::EpubDoc;
456    /// # let doc = EpubDoc::new("test.epub");
457    /// # let doc = doc.unwrap();
458    /// let mime = doc.get_resource_mime_by_path("OEBPS/Images/portada.png");
459    /// assert_eq!("image/png", mime.unwrap());
460    /// ```
461    ///
462    /// Returns [`None`] the resource can't be found.
463    pub fn get_resource_mime_by_path<P: AsRef<Path>>(&self, path: P) -> Option<String> {
464        let path = path.as_ref();
465
466        self.resources.iter().find_map(|(_, r)| {
467            if r.path == path {
468                Some(r.mime.clone())
469            } else {
470                None
471            }
472        })
473    }
474
475    /// Returns the current chapter content and mime-type
476    ///
477    /// The current follows the epub spine order. You can modify the current
478    /// calling to `go_next`, `go_prev` or `set_current` methods.
479    ///
480    /// Can return [`None`] if the epub is broken.
481    pub fn get_current(&mut self) -> Option<(Vec<u8>, String)> {
482        let current_id = self.get_current_id()?;
483        self.get_resource(&current_id)
484    }
485
486    /// See [`Self::get_current`]
487    pub fn get_current_str(&mut self) -> Option<(String, String)> {
488        let current_id = self.get_current_id()?;
489        self.get_resource_str(&current_id)
490    }
491
492    /// Returns the current chapter data, with resource uris renamed so they
493    /// have the epub:// prefix and all are relative to the root file
494    ///
495    /// This method is useful to render the content with a html engine, because inside the epub
496    /// local paths are relatives, so you can provide that content, because the engine will look
497    /// for the relative path in the filesystem and that file isn't there. You should provide files
498    /// with epub:// using [`Self::get_resource_by_path`]
499    ///
500    /// # Examples
501    ///
502    /// ```
503    /// # use epub::doc::EpubDoc;
504    /// # let mut doc = EpubDoc::new("test.epub").unwrap();
505    /// let current = doc.get_current_with_epub_uris().unwrap();
506    /// let text = String::from_utf8(current).unwrap();
507    /// assert!(text.contains("epub://OEBPS/Images/portada.png"));
508
509    /// doc.go_next();
510    /// let current = doc.get_current_with_epub_uris().unwrap();
511    /// let text = String::from_utf8(current).unwrap();
512    /// assert!(text.contains("epub://OEBPS/Styles/stylesheet.css"));
513    /// assert!(text.contains("http://creativecommons.org/licenses/by-sa/3.0/"));
514    /// ```
515    ///
516    /// # Errors
517    ///
518    /// Returns [`DocError::InvalidEpub`] if the epub is broken.
519    pub fn get_current_with_epub_uris(&mut self) -> Result<Vec<u8>, DocError> {
520        let path = self.get_current_path().ok_or(DocError::InvalidEpub)?;
521        let (current, _mime) = self.get_current().ok_or(DocError::InvalidEpub)?;
522
523        let resp = xmlutils::replace_attrs(
524            current.as_slice(),
525            |element, attr, value| match (element, attr) {
526                ("link", "href") | ("image", "href") | ("a", "href") | ("img", "src") => {
527                    build_epub_uri(&path, value)
528                }
529                _ => String::from(value),
530            },
531            &self.extra_css,
532        );
533
534        resp.map_err(From::from)
535    }
536
537    /// Returns the current chapter mimetype
538    ///
539    /// # Examples
540    ///
541    /// ```
542    /// # use epub::doc::EpubDoc;
543    /// # let doc = EpubDoc::new("test.epub");
544    /// # let doc = doc.unwrap();
545    /// let m = doc.get_current_mime();
546    /// assert_eq!("application/xhtml+xml", m.unwrap());
547    /// ```
548    ///
549    /// Can return [`None`] if the epub is broken.
550    pub fn get_current_mime(&self) -> Option<String> {
551        let current_id = self.get_current_id()?;
552        self.get_resource_mime(&current_id)
553    }
554
555    /// Returns the current chapter full path
556    ///
557    /// # Examples
558    ///
559    /// ```
560    /// # use epub::doc::EpubDoc;
561    /// # use std::path::Path;
562    /// # let doc = EpubDoc::new("test.epub");
563    /// # let doc = doc.unwrap();
564    /// let p = doc.get_current_path();
565    /// assert_eq!(Path::new("OEBPS/Text/titlepage.xhtml"), p.unwrap());
566    /// ```
567    ///
568    /// Can return [`None`] if the epub is broken.
569    pub fn get_current_path(&self) -> Option<PathBuf> {
570        let current_id = self.get_current_id()?;
571        self.resources.get(&current_id).map(|r| r.path.clone())
572    }
573
574    /// Returns the current chapter id
575    ///
576    /// # Examples
577    ///
578    /// ```
579    /// # use epub::doc::EpubDoc;
580    /// # let doc = EpubDoc::new("test.epub");
581    /// # let doc = doc.unwrap();
582    /// let id = doc.get_current_id();
583    /// assert_eq!("titlepage.xhtml", id.unwrap());
584    /// ```
585    ///
586    /// Can return [`None`] if the epub is broken.
587    pub fn get_current_id(&self) -> Option<String> {
588        self.spine.get(self.current).cloned().map(|i| i.idref)
589    }
590
591    /// Changes current to the next chapter
592    ///
593    /// # Examples
594    ///
595    /// ```
596    /// # use epub::doc::EpubDoc;
597    /// # let doc = EpubDoc::new("test.epub");
598    /// # let mut doc = doc.unwrap();
599    /// doc.go_next();
600    /// assert_eq!("000.xhtml", doc.get_current_id().unwrap());
601    ///
602    /// let len = doc.spine.len();
603    /// for i in 1..len {
604    ///     doc.go_next();
605    /// }
606    /// assert!(!doc.go_next());
607    /// ```
608    ///
609    /// Returns [`false`] if the current chapter is the last one
610    pub fn go_next(&mut self) -> bool {
611        if self.current + 1 >= self.spine.len() {
612            false
613        } else {
614            self.current += 1;
615            true
616        }
617    }
618
619    /// Changes current to the prev chapter
620    ///
621    /// # Examples
622    ///
623    /// ```
624    /// # use epub::doc::EpubDoc;
625    /// # let doc = EpubDoc::new("test.epub");
626    /// # let mut doc = doc.unwrap();
627    /// assert!(!doc.go_prev());
628    ///
629    /// doc.go_next(); // 000.xhtml
630    /// doc.go_next(); // 001.xhtml
631    /// doc.go_next(); // 002.xhtml
632    /// doc.go_prev(); // 001.xhtml
633    /// assert_eq!("001.xhtml", doc.get_current_id().unwrap());
634    /// ```
635    ///
636    /// Returns [`false`] if the current chapter is the first one
637    pub fn go_prev(&mut self) -> bool {
638        if self.current < 1 {
639            false
640        } else {
641            self.current -= 1;
642            true
643        }
644    }
645
646    /// Returns the number of chapters
647    ///
648    /// # Examples
649    ///
650    /// ```
651    /// # use epub::doc::EpubDoc;
652    /// # let doc = EpubDoc::new("test.epub");
653    /// # let mut doc = doc.unwrap();
654    /// assert_eq!(17, doc.get_num_chapters());
655    /// ```
656    pub fn get_num_chapters(&self) -> usize {
657        self.spine.len()
658    }
659
660    #[deprecated(note="please use `get_num_chapters` instead")]
661    pub fn get_num_pages(&self) -> usize {
662        self.get_num_chapters()
663    }
664
665    /// Returns the current chapter number, starting from 0
666    pub fn get_current_chapter(&self) -> usize {
667        self.current
668    }
669
670    #[deprecated(note="please use `get_current_chapter` instead")]
671    pub fn get_current_page(&self) -> usize {
672        self.get_current_chapter()
673    }
674
675
676    /// Changes the current chapter
677    ///
678    /// # Examples
679    ///
680    /// ```
681    /// # use epub::doc::EpubDoc;
682    /// # let doc = EpubDoc::new("test.epub");
683    /// # let mut doc = doc.unwrap();
684    /// assert_eq!(0, doc.get_current_chapter());
685    /// doc.set_current_chapter(2);
686    /// assert_eq!("001.xhtml", doc.get_current_id().unwrap());
687    /// assert_eq!(2, doc.get_current_chapter());
688    /// assert!(!doc.set_current_chapter(50));
689    /// ```
690    ///
691    /// Returns [`false`] if the chapter is out of bounds
692    pub fn set_current_chapter(&mut self, n: usize) -> bool {
693        if n >= self.spine.len() {
694            false
695        } else {
696            self.current = n;
697            true
698        }
699    }
700
701    #[deprecated(note="please use `set_current_chapter` instead")]
702    pub fn set_current_page(&mut self, n: usize) -> bool {
703        self.set_current_chapter(n)
704    }
705
706
707    /// This will inject arbitrary css into every queried html page 
708    /// [`Self::get_current_with_epub_uris`]
709    ///
710    /// # Examples
711    ///
712    /// ```
713    /// # use epub::doc::EpubDoc;
714    /// # let doc = EpubDoc::new("test.epub");
715    /// # let mut doc = doc.unwrap();
716    /// # let _ = doc.set_current_chapter(2);
717    /// let extracss = "body { background-color: black; color: white }";
718    /// doc.add_extra_css(extracss);
719    /// let current = doc.get_current_with_epub_uris().unwrap();
720    /// let text = String::from_utf8(current).unwrap();
721    /// assert!(text.contains(extracss));
722    /// ```
723    pub fn add_extra_css(&mut self, css: &str) {
724        self.extra_css.push(String::from(css));
725    }
726
727    /// Function to convert a resource path to a chapter number in the spine
728    /// If the resource isn't in the spine list, None will be returned
729    ///
730    /// This method is useful to convert a toc [`NavPoint`] content to a chapter number
731    /// to be able to navigate easily
732    pub fn resource_uri_to_chapter(&self, uri: &PathBuf) -> Option<usize> {
733        for (k, ResourceItem { path, .. }) in &self.resources {
734            if path == uri {
735                return self.resource_id_to_chapter(k);
736            }
737        }
738
739        None
740    }
741
742    /// Function to convert a resource id to a chapter number in the spine
743    /// If the resourse isn't in the spine list, None will be returned
744    pub fn resource_id_to_chapter(&self, uri: &str) -> Option<usize> {
745        self.spine.iter().position(|item| item.idref == uri)
746    }
747
748    fn fill_resources(&mut self) -> Result<(), DocError> {
749        let container = self.archive.get_entry(&self.root_file)?;
750        let root = xmlutils::XMLReader::parse(container.as_slice())?;
751        self.version = match root.borrow().get_attr("version") {
752            Some(v) if v == "2.0" => EpubVersion::Version2_0,
753            Some(v) if v == "3.0" => EpubVersion::Version3_0,
754            Some(v) => EpubVersion::Unknown(String::from(v)),
755            _ => EpubVersion::Unknown(String::from("Unknown")),
756        };
757        let unique_identifier_id = &root.borrow().get_attr("unique-identifier");
758
759        // resources from manifest
760        // This should be run before everything else, because other functions relies on
761        // self.resources and should be filled before calling `fill_toc`
762        let manifest = root
763            .borrow()
764            .find("manifest")
765            .ok_or(DocError::InvalidEpub)?;
766        for r in &manifest.borrow().children {
767            let item = r.borrow();
768            let _ = self.insert_resource(&item);
769        }
770
771        // items from spine
772        let spine = root.borrow().find("spine").ok_or(DocError::InvalidEpub)?;
773        for r in &spine.borrow().children {
774            let item = r.borrow();
775            let _ = self.insert_spine(&item);
776        }
777
778        // toc.ncx
779        if let Some(toc) = spine.borrow().get_attr("toc") {
780            let _ = self.fill_toc(&toc);
781        }
782
783        // metadata
784        let metadata_elem = root
785            .borrow()
786            .find("metadata")
787            .ok_or(DocError::InvalidEpub)?;
788        self.fill_metadata(&metadata_elem.borrow());
789
790        let identifier = if let Some(uid) = unique_identifier_id {
791            // find identifier with id
792            self.metadata
793                .iter()
794                .find(|d| d.property == "identifier" && d.id.as_ref().is_some_and(|id| id == uid))
795        } else {
796            // fallback with the first identifier.
797            self.metadata.iter().find(|d| d.property == "identifier")
798        };
799        self.unique_identifier = identifier.map(|data| data.value.clone());
800
801        Ok(())
802    }
803
804    fn fill_metadata(&mut self, elem: &xmlutils::XMLNode) {
805        // refinements are inserted here with ID as key, these are later associated to metadata
806        let mut refinements: HashMap<String, Vec<MetadataRefinement>> = HashMap::new();
807        for r in &elem.children {
808            let item = r.borrow();
809            // for each acceptable element, either push a metadata item or push a refinement
810            match (item.name.namespace_ref(), &item.name.local_name) {
811                // dcterms
812                (Some("http://purl.org/dc/elements/1.1/"), name) => {
813                    let id = item.get_attr("id");
814                    let lang = item.get_attr("lang");
815                    let property = name.clone();
816                    let value = item.text.clone().unwrap_or_default();
817
818                    let refined: Vec<MetadataRefinement> =
819                        if let EpubVersion::Version3_0 = self.version {
820                            vec![]
821                        } else {
822                            // treat it as EPUB2 dcterms, storing additional info in attributes
823                            item.attrs
824                                .iter()
825                                .filter_map(|attr| {
826                                    if let Some("http://www.idpf.org/2007/opf") =
827                                        attr.name.namespace_ref()
828                                    {
829                                        let property = attr.name.local_name.clone();
830                                        let value = attr.value.clone();
831                                        Some(MetadataRefinement {
832                                            property,
833                                            value,
834                                            lang: None,
835                                            scheme: None,
836                                        })
837                                    } else {
838                                        None
839                                    }
840                                })
841                                .collect()
842                        };
843                    self.metadata.push(MetadataItem {
844                        id,
845                        property,
846                        value,
847                        lang,
848                        refined,
849                    });
850                }
851
852                // <meta>
853                (Some("http://www.idpf.org/2007/opf"), name)
854                    if name.eq_ignore_ascii_case("meta") =>
855                {
856                    if let Some(property) = item.get_attr("property") {
857                        // EPUB3 <meta>, value in its text content
858                        let value = item.text.clone().unwrap_or_default();
859                        let lang = item.get_attr("lang");
860                        if let Some(refines) = item.get_attr("refines") {
861                            // refinement (subexpression in EPUB3 terminology)
862                            let tid = refines.strip_prefix('#').unwrap_or_else(|| &refines);
863                            let scheme = item.get_attr("scheme");
864                            let refinement = MetadataRefinement {
865                                property,
866                                value,
867                                lang,
868                                scheme,
869                            };
870                            if let Some(refs) = refinements.get_mut(tid) {
871                                refs.push(refinement);
872                            } else {
873                                refinements.insert(tid.to_string(), vec![refinement]);
874                            }
875                        } else {
876                            // primary
877                            let id = item.get_attr("id");
878                            self.metadata.push(MetadataItem {
879                                id,
880                                property,
881                                value,
882                                lang,
883                                refined: vec![],
884                            });
885                        }
886                    } else if let (Some(property), Some(value)) =
887                        (item.get_attr("name"), item.get_attr("content"))
888                    {
889                        // Legacy XHTML1.1 <meta>
890                        self.metadata.push(MetadataItem {
891                            id: None,
892                            property,
893                            value,
894                            lang: None,
895                            refined: vec![],
896                        });
897                    }
898                }
899
900                _ => (),
901            }
902        }
903
904        // associate refinements
905        self.metadata.iter_mut().for_each(|item| {
906            if let Some(id) = &item.id {
907                if let Some(mut refs) = refinements.remove(id) {
908                    item.refined.append(&mut refs);
909                }
910            }
911        });
912    }
913
914    // Forcibly converts separators in a filepath to unix separators to
915    // to ensure that ZipArchive's by_name method will retrieve the proper
916    // file. Failing to convert to unix-style on Windows causes the
917    // ZipArchive not to find the file.
918    fn convert_path_seps<P: AsRef<Path>>(&self, href: P) -> PathBuf {
919        let mut path = self.root_base.join(href);
920        if cfg!(windows) {
921            path = PathBuf::from(path.to_string_lossy().replace('\\', "/"));
922        }
923        path
924    }
925
926    fn insert_resource(&mut self, item: &xmlutils::XMLNode) -> Result<(), XMLError> {
927        let id = item
928            .get_attr("id")
929            .ok_or_else(|| XMLError::AttrNotFound("id".into()))?;
930        let href = item
931            .get_attr("href")
932            .ok_or_else(|| XMLError::AttrNotFound("href".into()))?;
933        let mime = item
934            .get_attr("media-type")
935            .ok_or_else(|| XMLError::AttrNotFound("media-type".into()))?;
936        let properties = item.get_attr("properties");
937
938        self.resources.insert(
939            id,
940            ResourceItem {
941                path: self.convert_path_seps(href),
942                mime,
943                properties,
944            },
945        );
946        Ok(())
947    }
948
949    fn insert_spine(&mut self, item: &xmlutils::XMLNode) -> Result<(), DocError> {
950        let idref = item
951            .get_attr("idref")
952            .ok_or_else(|| XMLError::AttrNotFound("idref".into()))?;
953        let linear = item.get_attr("linear").unwrap_or("yes".into()) == "yes";
954        let properties = item.get_attr("properties");
955        let id = item.get_attr("id");
956        self.spine.push(SpineItem {
957            idref,
958            id,
959            linear,
960            properties,
961        });
962        Ok(())
963    }
964
965    fn fill_toc(&mut self, id: &str) -> Result<(), DocError> {
966        let toc_res = self.resources.get(id).ok_or(DocError::InvalidEpub)?; // this should be turned into it's own error type, but
967
968        let container = self.archive.get_entry(&toc_res.path)?;
969        let root = xmlutils::XMLReader::parse(container.as_slice())?;
970
971        self.toc_title = root
972            .borrow()
973            .find("docTitle")
974            .and_then(|dt| {
975                dt.borrow()
976                    .children
977                    .get(0)
978                    .and_then(|t| t.borrow().text.clone())
979            })
980            .unwrap_or_default();
981
982        let mapnode = root
983            .borrow()
984            .find("navMap")
985            .ok_or_else(|| XMLError::AttrNotFound("navMap".into()))?;
986
987        self.toc.append(&mut self.get_navpoints(&mapnode.borrow()));
988        self.toc.sort();
989
990        Ok(())
991    }
992
993    /// Recursively extract all navpoints from a node.
994    fn get_navpoints(&self, parent: &xmlutils::XMLNode) -> Vec<NavPoint> {
995        let mut navpoints = Vec::new();
996
997        // TODO: parse metadata (dtb:totalPageCount, dtb:depth, dtb:maxPageNumber)
998
999        for nav in &parent.children {
1000            let item = nav.borrow();
1001            if item.name.local_name != "navPoint" {
1002                continue;
1003            }
1004            let play_order = item
1005                .get_attr("playOrder")
1006                .and_then(|n| n.parse::<usize>().ok());
1007            let content = item
1008                .find("content")
1009                .and_then(|c| c.borrow().get_attr("src").map(|p| self.root_base.join(p)));
1010
1011            let label = item.find("navLabel").and_then(|l| {
1012                l.borrow()
1013                    .children
1014                    .get(0)
1015                    .and_then(|t| t.borrow().text.clone())
1016            });
1017
1018            if let (order, Some(content_path), Some(label_text)) = (play_order, content, label) {
1019                let navpoint = NavPoint {
1020                    label: label_text.clone(),
1021                    content: content_path.clone(),
1022                    children: self.get_navpoints(&item),
1023                    play_order: order,
1024                };
1025                navpoints.push(navpoint);
1026            }
1027        }
1028
1029        navpoints.sort();
1030        navpoints
1031    }
1032}
1033
1034fn get_root_file(container: &[u8]) -> Result<PathBuf, DocError> {
1035    let root = xmlutils::XMLReader::parse(container)?;
1036    let el = root.borrow();
1037    let element = el
1038        .find("rootfile")
1039        .ok_or_else(|| XMLError::AttrNotFound("rootfile".into()))?;
1040    let el2 = element.borrow();
1041
1042    let attr = el2
1043        .get_attr("full-path")
1044        .ok_or_else(|| XMLError::AttrNotFound("full-path".into()))?;
1045
1046    Ok(PathBuf::from(attr))
1047}
1048
1049fn build_epub_uri<P: AsRef<Path>>(path: P, append: &str) -> String {
1050    // allowing external links
1051    if append.starts_with("http") {
1052        return String::from(append);
1053    }
1054
1055    let path = path.as_ref();
1056    let mut cpath = path.to_path_buf();
1057
1058    // current file base dir
1059    cpath.pop();
1060    for p in Path::new(append).components() {
1061        match p {
1062            Component::ParentDir => {
1063                cpath.pop();
1064            }
1065            Component::Normal(s) => {
1066                cpath.push(s);
1067            }
1068            _ => {}
1069        };
1070    }
1071
1072    // If on Windows, replace all Windows path separators with Unix path separators
1073    let path = if cfg!(windows) {
1074        cpath.to_string_lossy().replace('\\', "/")
1075    } else {
1076        cpath.to_string_lossy().to_string()
1077    };
1078
1079    format!("epub://{}", path)
1080}