epub/doc.rs
1//! Manages the epub doc.
2//!
3//! Provides easy methods to navigate through the epub content, cover,
4//! chapters, etc.
5//!
6//! Main references to EPUB specs:
7//! - https://www.w3.org/TR/epub-33
8//! - https://idpf.org/epub/201
9
10use std::cmp::Ordering;
11use std::collections::HashMap;
12use std::fs::File;
13use std::io::BufReader;
14use std::io::{Read, Seek};
15use std::path::{Component, Path, PathBuf};
16use xmlutils::XMLError;
17
18use crate::archive::EpubArchive;
19
20use crate::xmlutils;
21
22#[derive(Debug, thiserror::Error)]
23pub enum DocError {
24 #[error("Archive Error: {0}")]
25 ArchiveError(#[from] crate::archive::ArchiveError),
26 #[error("XML Error: {0}")]
27 XmlError(#[from] crate::xmlutils::XMLError),
28 #[error("I/O Error: {0}")]
29 IOError(#[from] std::io::Error),
30 #[error("Invalid EPub")]
31 InvalidEpub,
32}
33
34#[derive(Clone, Debug, PartialEq, PartialOrd)]
35pub enum EpubVersion {
36 Version2_0,
37 Version3_0,
38 Unknown(String),
39}
40
41/// Struct that represent a navigation point in a table of content
42#[derive(Clone, Debug, Eq)]
43pub struct NavPoint {
44 /// the title of this navpoint
45 pub label: String,
46 /// the resource path
47 pub content: PathBuf,
48 /// nested navpoints
49 pub children: Vec<NavPoint>,
50 /// the order in the toc
51 pub play_order: Option<usize>,
52}
53
54impl Ord for NavPoint {
55 fn cmp(&self, other: &Self) -> Ordering {
56 self.play_order.cmp(&other.play_order)
57 }
58}
59
60impl PartialOrd for NavPoint {
61 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
62 Some(self.cmp(other))
63 }
64}
65
66impl PartialEq for NavPoint {
67 fn eq(&self, other: &Self) -> bool {
68 self.play_order == other.play_order
69 }
70}
71
72/// An EPUB3 metadata subexpression.
73/// It is associated with another metadata expression.
74/// The design follows EPUB3 but can be approximated when facing EPUB2 using attributes.
75#[derive(Clone, Debug)]
76pub struct MetadataRefinement {
77 pub property: String,
78 pub value: String,
79 pub lang: Option<String>,
80 pub scheme: Option<String>,
81}
82
83/// An EPUB3 Dublin Core metadata item.
84/// The design follows EPUB3's dcterms element but can draw information both
85/// dcterms and primary `<meta>` expressions.
86///
87/// When facing EPUB2, it also draws information from XHTML1.1 `<meta>`.
88#[derive(Clone, Debug)]
89pub struct MetadataItem {
90 pub(crate) id: Option<String>,
91 pub property: String,
92 pub value: String,
93 pub lang: Option<String>,
94 pub refined: Vec<MetadataRefinement>,
95}
96
97impl MetadataItem {
98 pub fn refinement(&self, property: &str) -> Option<&MetadataRefinement> {
99 self.refined.iter().find(|r| r.property == property)
100 }
101}
102
103#[derive(Clone, Debug)]
104pub struct SpineItem {
105 pub idref: String,
106 pub id: Option<String>,
107 pub properties: Option<String>,
108 pub linear: bool,
109}
110
111#[derive(Clone, Debug)]
112pub struct ResourceItem {
113 pub path: PathBuf,
114 pub mime: String,
115 pub properties: Option<String>,
116}
117
118/// Struct to control the epub document
119///
120/// The general policy for `EpubDoc` is to support both EPUB2 (commonly used)
121/// and EPUB3 (standard). Considering epub files that have mixed EPUB2 and
122/// EPUB3 features, the implementation of `EpubDoc` isn't strict and rejects
123/// something not in accordance with the specified version only when necessary.
124#[derive(Clone, Debug)]
125pub struct EpubDoc<R: Read + Seek> {
126 /// the zip archive
127 archive: EpubArchive<R>,
128
129 /// The current chapter, is an spine index
130 current: usize,
131
132 /// epub spec version
133 pub version: EpubVersion,
134
135 /// epub spine ids
136 pub spine: Vec<SpineItem>,
137
138 /// resource id -> (path, mime)
139 pub resources: HashMap<String, ResourceItem>,
140
141 /// table of content, list of `NavPoint` in the toc.ncx
142 pub toc: Vec<NavPoint>,
143
144 /// title of toc
145 pub toc_title: String,
146
147 /// The epub metadata.
148 ///
149 /// # Examples
150 ///
151 /// ```
152 /// # use epub::doc::EpubDoc;
153 /// # let doc = EpubDoc::new("test.epub");
154 /// # let doc = doc.unwrap();
155 /// let title = doc.metadata.iter().find(|d| d.property == "title");
156 /// assert_eq!(title.unwrap().value, "Todo es mío");
157 /// ```
158 ///
159 /// See `mdata(property)` for a convenient method returning the first matching item.
160 pub metadata: Vec<MetadataItem>,
161
162 /// root file base path
163 pub root_base: PathBuf,
164
165 /// root file full path
166 pub root_file: PathBuf,
167
168 /// Custom css list to inject in every xhtml file
169 pub extra_css: Vec<String>,
170
171 /// unique identifier
172 pub unique_identifier: Option<String>,
173}
174
175/// A EpubDoc used for testing purposes
176#[cfg(feature = "mock")]
177impl EpubDoc<std::io::Cursor<Vec<u8>>> {
178 pub fn mock() -> Result<Self, DocError> {
179 // binary for empty zip file so that archive can be created
180 let data: Vec<u8> = vec![
181 0x50, 0x4b, 0x05, 0x06, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00,
182 00, 00,
183 ];
184
185 let archive = EpubArchive::from_reader(std::io::Cursor::new(data))?;
186 Ok(Self {
187 archive,
188 version: EpubVersion::Version2_0,
189 spine: vec![],
190 toc: vec![],
191 toc_title: String::new(),
192 resources: HashMap::new(),
193 metadata: Vec::new(),
194 root_file: PathBuf::new(),
195 root_base: PathBuf::new(),
196 current: 0,
197 extra_css: vec![],
198 unique_identifier: None,
199 })
200 }
201}
202
203impl EpubDoc<BufReader<File>> {
204 /// Opens the epub file in `path`.
205 ///
206 /// Initialize some internal variables to be able to access to the epub
207 /// spine definition and to navigate through the epub.
208 ///
209 /// # Examples
210 ///
211 /// ```
212 /// use epub::doc::EpubDoc;
213 ///
214 /// let doc = EpubDoc::new("test.epub");
215 /// assert!(doc.is_ok());
216 /// ```
217 ///
218 /// # Errors
219 ///
220 /// Returns an error if the epub is broken or if the file doesn't
221 /// exists.
222 pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, DocError> {
223 let path = path.as_ref();
224 let file = File::open(path)?;
225 let mut doc = Self::from_reader(BufReader::new(file))?;
226 doc.archive.path = path.to_path_buf();
227 Ok(doc)
228 }
229}
230
231impl<R: Read + Seek> EpubDoc<R> {
232 /// Opens the epub contained in `reader`.
233 ///
234 /// Initialize some internal variables to be able to access to the epub
235 /// spine definition and to navigate through the epub.
236 ///
237 /// # Examples
238 ///
239 /// ```
240 /// use epub::doc::EpubDoc;
241 /// use std::fs::File;
242 /// use std::io::{Cursor, Read};
243 ///
244 /// let mut file = File::open("test.epub").unwrap();
245 /// let mut buffer = Vec::new();
246 /// file.read_to_end(&mut buffer).unwrap();
247 ///
248 /// let cursor = Cursor::new(buffer);
249 ///
250 /// let doc = EpubDoc::from_reader(cursor);
251 /// assert!(doc.is_ok());
252 /// ```
253 ///
254 /// # Errors
255 ///
256 /// Returns an error if the epub is broken.
257 pub fn from_reader(reader: R) -> Result<Self, DocError> {
258 let mut archive = EpubArchive::from_reader(reader)?;
259
260 let container = archive.get_container_file()?;
261 let root_file = get_root_file(&container)?;
262 let base_path = root_file.parent().expect("All files have a parent");
263 let mut doc = Self {
264 archive,
265 version: EpubVersion::Version2_0,
266 spine: vec![],
267 toc: vec![],
268 toc_title: String::new(),
269 resources: HashMap::new(),
270 metadata: Vec::new(),
271 root_file: root_file.clone(),
272 root_base: base_path.to_path_buf(),
273 current: 0,
274 extra_css: vec![],
275 unique_identifier: None,
276 };
277 doc.fill_resources()?;
278 Ok(doc)
279 }
280
281 /// Returns the first metadata found with this property name.
282 ///
283 /// # Examples
284 ///
285 /// ```
286 /// # use epub::doc::EpubDoc;
287 /// # let doc = EpubDoc::new("test.epub");
288 /// # let doc = doc.unwrap();
289 /// let language = doc.mdata("language");
290 /// assert_eq!(language.unwrap().value, "es");
291 pub fn mdata(&self, property: &str) -> Option<&MetadataItem> {
292 self.metadata.iter().find(|data| data.property == property)
293 }
294
295 /// Returns the title.
296 ///
297 /// An EPUB file may provide multiple titles. This method only returns the
298 /// primary one. Access `metadata` directly to gain more control.
299 pub fn get_title(&self) -> Option<String> {
300 self.mdata("title").map(|item| item.value.clone())
301 }
302
303 /// Returns the id of the epub cover.
304 ///
305 /// # Examples
306 ///
307 /// ```rust
308 /// use epub::doc::EpubDoc;
309 ///
310 /// let doc = EpubDoc::new("test.epub");
311 /// assert!(doc.is_ok());
312 /// let mut doc = doc.unwrap();
313 ///
314 /// let cover_id = doc.get_cover_id();
315 /// ```
316 ///
317 /// This returns the cover id, which can be used to get the cover data.
318 /// The id is not guaranteed to be valid.
319 pub fn get_cover_id(&self) -> Option<String> {
320 match self.version {
321 // EPUB3 requires zero or one cover-image resource
322 EpubVersion::Version3_0 => self.resources.iter().find_map(|(id, resource)| {
323 resource
324 .properties
325 .as_ref()
326 .and_then(|ps| ps.split_ascii_whitespace().find(|p| *p == "cover-image"))
327 .map(|_| id.clone())
328 }),
329 // EPUB2 doesn't include cover identification, but a common practice is `<meta name="cover">`
330 _ => self.mdata("cover").map(|item| item.value.clone()),
331 }
332 }
333
334 /// Returns the id of the navigation document (EPUB3 only).
335 ///
336 /// **Relationship with `toc`**:
337 /// "Navigation document" is a concept formalized in EPUB3, superseding NCX
338 /// format used in EPUB2. NCX is required in EPUB2 and not EPUB3, though
339 /// some authors provide both in the archive. `self.toc` (parsed from NCX)
340 /// and this are independent on each other.
341 pub fn get_nav_id(&self) -> Option<String> {
342 match self.version {
343 // EPUB3 requires exactly one nav resource
344 EpubVersion::Version3_0 => self.resources.iter().find_map(|(id, resource)| {
345 resource
346 .properties
347 .as_ref()
348 .and_then(|ps| ps.split_ascii_whitespace().find(|p| *p == "nav"))
349 .map(|_| id.clone())
350 }),
351 // The concept of navigation document doesn't exist in EPUB2.
352 _ => None,
353 }
354 }
355
356 /// Returns the cover's content and mime-type
357 ///
358 /// # Examples
359 ///
360 /// ```rust,ignore
361 /// use std::fs;
362 /// use std::io::Write;
363 /// use epub::doc::EpubDoc;
364 ///
365 /// let doc = EpubDoc::new("test.epub");
366 /// assert!(doc.is_ok());
367 /// let mut doc = doc.unwrap();
368 ///
369 /// let cover_data = doc.get_cover().unwrap();
370 ///
371 /// let f = fs::File::create("/tmp/cover.png");
372 /// assert!(f.is_ok());
373 /// let mut f = f.unwrap();
374 /// let resp = f.write_all(&cover_data);
375 /// ```
376 ///
377 /// Returns [`None`] if the cover can't be found.
378 pub fn get_cover(&mut self) -> Option<(Vec<u8>, String)> {
379 let cover_id = self.get_cover_id();
380 cover_id.and_then(|cid| self.get_resource(&cid))
381 }
382
383 /// Returns Release Identifier defined at
384 /// https://www.w3.org/publishing/epub32/epub-packages.html#sec-metadata-elem-identifiers-pid
385 pub fn get_release_identifier(&self) -> Option<String> {
386 match (
387 self.unique_identifier.as_ref(),
388 self.mdata("dcterms:modified"),
389 ) {
390 (Some(unique_identifier), Some(modified)) => {
391 Some(format!("{}@{}", unique_identifier, modified.value))
392 }
393 _ => None,
394 }
395 }
396
397 /// Returns the resource content by full path in the epub archive
398 ///
399 /// Returns [`None`] if the path doesn't exist in the epub
400 pub fn get_resource_by_path<P: AsRef<Path>>(&mut self, path: P) -> Option<Vec<u8>> {
401 self.archive.get_entry(path).ok()
402 }
403
404 /// Returns the resource content and mime-type by the id defined in the spine
405 ///
406 /// Returns [`None`] if the id doesn't exists in the epub
407 pub fn get_resource(&mut self, id: &str) -> Option<(Vec<u8>, String)> {
408 let ResourceItem { path, mime, .. } = self.resources.get(id)?;
409 let path = path.clone();
410 let mime = mime.clone();
411 let content = self.get_resource_by_path(&path)?;
412 Some((content, mime))
413 }
414
415 /// Returns the resource content by full path in the epub archive, as String
416 ///
417 /// Returns [`None`] if the path doesn't exists in the epub
418 pub fn get_resource_str_by_path<P: AsRef<Path>>(&mut self, path: P) -> Option<String> {
419 self.archive.get_entry_as_str(path).ok()
420 }
421
422 /// Returns the resource content and mime-type by the id defined in the spine, as String
423 ///
424 /// Returns [`None`] if the id doesn't exists in the epub
425 pub fn get_resource_str(&mut self, id: &str) -> Option<(String, String)> {
426 let ResourceItem { path, mime, .. } = self.resources.get(id)?;
427 let mime = mime.clone();
428 let path = path.clone();
429 let content = self.get_resource_str_by_path(path)?;
430 Some((content, mime))
431 }
432
433 /// Returns the resource mime-type
434 ///
435 /// # Examples
436 ///
437 /// ```
438 /// # use epub::doc::EpubDoc;
439 /// # let doc = EpubDoc::new("test.epub");
440 /// # let doc = doc.unwrap();
441 /// let mime = doc.get_resource_mime("portada.png");
442 /// assert_eq!("image/png", mime.unwrap());
443 /// ```
444 ///
445 /// Returns [`None`] the resource can't be found.
446 pub fn get_resource_mime(&self, id: &str) -> Option<String> {
447 self.resources.get(id).map(|r| r.mime.clone())
448 }
449
450 /// Returns the resource mime searching by source full path
451 ///
452 /// # Examples
453 ///
454 /// ```
455 /// # use epub::doc::EpubDoc;
456 /// # let doc = EpubDoc::new("test.epub");
457 /// # let doc = doc.unwrap();
458 /// let mime = doc.get_resource_mime_by_path("OEBPS/Images/portada.png");
459 /// assert_eq!("image/png", mime.unwrap());
460 /// ```
461 ///
462 /// Returns [`None`] the resource can't be found.
463 pub fn get_resource_mime_by_path<P: AsRef<Path>>(&self, path: P) -> Option<String> {
464 let path = path.as_ref();
465
466 self.resources.iter().find_map(|(_, r)| {
467 if r.path == path {
468 Some(r.mime.clone())
469 } else {
470 None
471 }
472 })
473 }
474
475 /// Returns the current chapter content and mime-type
476 ///
477 /// The current follows the epub spine order. You can modify the current
478 /// calling to `go_next`, `go_prev` or `set_current` methods.
479 ///
480 /// Can return [`None`] if the epub is broken.
481 pub fn get_current(&mut self) -> Option<(Vec<u8>, String)> {
482 let current_id = self.get_current_id()?;
483 self.get_resource(¤t_id)
484 }
485
486 /// See [`Self::get_current`]
487 pub fn get_current_str(&mut self) -> Option<(String, String)> {
488 let current_id = self.get_current_id()?;
489 self.get_resource_str(¤t_id)
490 }
491
492 /// Returns the current chapter data, with resource uris renamed so they
493 /// have the epub:// prefix and all are relative to the root file
494 ///
495 /// This method is useful to render the content with a html engine, because inside the epub
496 /// local paths are relatives, so you can provide that content, because the engine will look
497 /// for the relative path in the filesystem and that file isn't there. You should provide files
498 /// with epub:// using [`Self::get_resource_by_path`]
499 ///
500 /// # Examples
501 ///
502 /// ```
503 /// # use epub::doc::EpubDoc;
504 /// # let mut doc = EpubDoc::new("test.epub").unwrap();
505 /// let current = doc.get_current_with_epub_uris().unwrap();
506 /// let text = String::from_utf8(current).unwrap();
507 /// assert!(text.contains("epub://OEBPS/Images/portada.png"));
508
509 /// doc.go_next();
510 /// let current = doc.get_current_with_epub_uris().unwrap();
511 /// let text = String::from_utf8(current).unwrap();
512 /// assert!(text.contains("epub://OEBPS/Styles/stylesheet.css"));
513 /// assert!(text.contains("http://creativecommons.org/licenses/by-sa/3.0/"));
514 /// ```
515 ///
516 /// # Errors
517 ///
518 /// Returns [`DocError::InvalidEpub`] if the epub is broken.
519 pub fn get_current_with_epub_uris(&mut self) -> Result<Vec<u8>, DocError> {
520 let path = self.get_current_path().ok_or(DocError::InvalidEpub)?;
521 let (current, _mime) = self.get_current().ok_or(DocError::InvalidEpub)?;
522
523 let resp = xmlutils::replace_attrs(
524 current.as_slice(),
525 |element, attr, value| match (element, attr) {
526 ("link", "href") | ("image", "href") | ("a", "href") | ("img", "src") => {
527 build_epub_uri(&path, value)
528 }
529 _ => String::from(value),
530 },
531 &self.extra_css,
532 );
533
534 resp.map_err(From::from)
535 }
536
537 /// Returns the current chapter mimetype
538 ///
539 /// # Examples
540 ///
541 /// ```
542 /// # use epub::doc::EpubDoc;
543 /// # let doc = EpubDoc::new("test.epub");
544 /// # let doc = doc.unwrap();
545 /// let m = doc.get_current_mime();
546 /// assert_eq!("application/xhtml+xml", m.unwrap());
547 /// ```
548 ///
549 /// Can return [`None`] if the epub is broken.
550 pub fn get_current_mime(&self) -> Option<String> {
551 let current_id = self.get_current_id()?;
552 self.get_resource_mime(¤t_id)
553 }
554
555 /// Returns the current chapter full path
556 ///
557 /// # Examples
558 ///
559 /// ```
560 /// # use epub::doc::EpubDoc;
561 /// # use std::path::Path;
562 /// # let doc = EpubDoc::new("test.epub");
563 /// # let doc = doc.unwrap();
564 /// let p = doc.get_current_path();
565 /// assert_eq!(Path::new("OEBPS/Text/titlepage.xhtml"), p.unwrap());
566 /// ```
567 ///
568 /// Can return [`None`] if the epub is broken.
569 pub fn get_current_path(&self) -> Option<PathBuf> {
570 let current_id = self.get_current_id()?;
571 self.resources.get(¤t_id).map(|r| r.path.clone())
572 }
573
574 /// Returns the current chapter id
575 ///
576 /// # Examples
577 ///
578 /// ```
579 /// # use epub::doc::EpubDoc;
580 /// # let doc = EpubDoc::new("test.epub");
581 /// # let doc = doc.unwrap();
582 /// let id = doc.get_current_id();
583 /// assert_eq!("titlepage.xhtml", id.unwrap());
584 /// ```
585 ///
586 /// Can return [`None`] if the epub is broken.
587 pub fn get_current_id(&self) -> Option<String> {
588 self.spine.get(self.current).cloned().map(|i| i.idref)
589 }
590
591 /// Changes current to the next chapter
592 ///
593 /// # Examples
594 ///
595 /// ```
596 /// # use epub::doc::EpubDoc;
597 /// # let doc = EpubDoc::new("test.epub");
598 /// # let mut doc = doc.unwrap();
599 /// doc.go_next();
600 /// assert_eq!("000.xhtml", doc.get_current_id().unwrap());
601 ///
602 /// let len = doc.spine.len();
603 /// for i in 1..len {
604 /// doc.go_next();
605 /// }
606 /// assert!(!doc.go_next());
607 /// ```
608 ///
609 /// Returns [`false`] if the current chapter is the last one
610 pub fn go_next(&mut self) -> bool {
611 if self.current + 1 >= self.spine.len() {
612 false
613 } else {
614 self.current += 1;
615 true
616 }
617 }
618
619 /// Changes current to the prev chapter
620 ///
621 /// # Examples
622 ///
623 /// ```
624 /// # use epub::doc::EpubDoc;
625 /// # let doc = EpubDoc::new("test.epub");
626 /// # let mut doc = doc.unwrap();
627 /// assert!(!doc.go_prev());
628 ///
629 /// doc.go_next(); // 000.xhtml
630 /// doc.go_next(); // 001.xhtml
631 /// doc.go_next(); // 002.xhtml
632 /// doc.go_prev(); // 001.xhtml
633 /// assert_eq!("001.xhtml", doc.get_current_id().unwrap());
634 /// ```
635 ///
636 /// Returns [`false`] if the current chapter is the first one
637 pub fn go_prev(&mut self) -> bool {
638 if self.current < 1 {
639 false
640 } else {
641 self.current -= 1;
642 true
643 }
644 }
645
646 /// Returns the number of chapters
647 ///
648 /// # Examples
649 ///
650 /// ```
651 /// # use epub::doc::EpubDoc;
652 /// # let doc = EpubDoc::new("test.epub");
653 /// # let mut doc = doc.unwrap();
654 /// assert_eq!(17, doc.get_num_chapters());
655 /// ```
656 pub fn get_num_chapters(&self) -> usize {
657 self.spine.len()
658 }
659
660 #[deprecated(note="please use `get_num_chapters` instead")]
661 pub fn get_num_pages(&self) -> usize {
662 self.get_num_chapters()
663 }
664
665 /// Returns the current chapter number, starting from 0
666 pub fn get_current_chapter(&self) -> usize {
667 self.current
668 }
669
670 #[deprecated(note="please use `get_current_chapter` instead")]
671 pub fn get_current_page(&self) -> usize {
672 self.get_current_chapter()
673 }
674
675
676 /// Changes the current chapter
677 ///
678 /// # Examples
679 ///
680 /// ```
681 /// # use epub::doc::EpubDoc;
682 /// # let doc = EpubDoc::new("test.epub");
683 /// # let mut doc = doc.unwrap();
684 /// assert_eq!(0, doc.get_current_chapter());
685 /// doc.set_current_chapter(2);
686 /// assert_eq!("001.xhtml", doc.get_current_id().unwrap());
687 /// assert_eq!(2, doc.get_current_chapter());
688 /// assert!(!doc.set_current_chapter(50));
689 /// ```
690 ///
691 /// Returns [`false`] if the chapter is out of bounds
692 pub fn set_current_chapter(&mut self, n: usize) -> bool {
693 if n >= self.spine.len() {
694 false
695 } else {
696 self.current = n;
697 true
698 }
699 }
700
701 #[deprecated(note="please use `set_current_chapter` instead")]
702 pub fn set_current_page(&mut self, n: usize) -> bool {
703 self.set_current_chapter(n)
704 }
705
706
707 /// This will inject arbitrary css into every queried html page
708 /// [`Self::get_current_with_epub_uris`]
709 ///
710 /// # Examples
711 ///
712 /// ```
713 /// # use epub::doc::EpubDoc;
714 /// # let doc = EpubDoc::new("test.epub");
715 /// # let mut doc = doc.unwrap();
716 /// # let _ = doc.set_current_chapter(2);
717 /// let extracss = "body { background-color: black; color: white }";
718 /// doc.add_extra_css(extracss);
719 /// let current = doc.get_current_with_epub_uris().unwrap();
720 /// let text = String::from_utf8(current).unwrap();
721 /// assert!(text.contains(extracss));
722 /// ```
723 pub fn add_extra_css(&mut self, css: &str) {
724 self.extra_css.push(String::from(css));
725 }
726
727 /// Function to convert a resource path to a chapter number in the spine
728 /// If the resource isn't in the spine list, None will be returned
729 ///
730 /// This method is useful to convert a toc [`NavPoint`] content to a chapter number
731 /// to be able to navigate easily
732 pub fn resource_uri_to_chapter(&self, uri: &PathBuf) -> Option<usize> {
733 for (k, ResourceItem { path, .. }) in &self.resources {
734 if path == uri {
735 return self.resource_id_to_chapter(k);
736 }
737 }
738
739 None
740 }
741
742 /// Function to convert a resource id to a chapter number in the spine
743 /// If the resourse isn't in the spine list, None will be returned
744 pub fn resource_id_to_chapter(&self, uri: &str) -> Option<usize> {
745 self.spine.iter().position(|item| item.idref == uri)
746 }
747
748 fn fill_resources(&mut self) -> Result<(), DocError> {
749 let container = self.archive.get_entry(&self.root_file)?;
750 let root = xmlutils::XMLReader::parse(container.as_slice())?;
751 self.version = match root.borrow().get_attr("version") {
752 Some(v) if v == "2.0" => EpubVersion::Version2_0,
753 Some(v) if v == "3.0" => EpubVersion::Version3_0,
754 Some(v) => EpubVersion::Unknown(String::from(v)),
755 _ => EpubVersion::Unknown(String::from("Unknown")),
756 };
757 let unique_identifier_id = &root.borrow().get_attr("unique-identifier");
758
759 // resources from manifest
760 // This should be run before everything else, because other functions relies on
761 // self.resources and should be filled before calling `fill_toc`
762 let manifest = root
763 .borrow()
764 .find("manifest")
765 .ok_or(DocError::InvalidEpub)?;
766 for r in &manifest.borrow().children {
767 let item = r.borrow();
768 let _ = self.insert_resource(&item);
769 }
770
771 // items from spine
772 let spine = root.borrow().find("spine").ok_or(DocError::InvalidEpub)?;
773 for r in &spine.borrow().children {
774 let item = r.borrow();
775 let _ = self.insert_spine(&item);
776 }
777
778 // toc.ncx
779 if let Some(toc) = spine.borrow().get_attr("toc") {
780 let _ = self.fill_toc(&toc);
781 }
782
783 // metadata
784 let metadata_elem = root
785 .borrow()
786 .find("metadata")
787 .ok_or(DocError::InvalidEpub)?;
788 self.fill_metadata(&metadata_elem.borrow());
789
790 let identifier = if let Some(uid) = unique_identifier_id {
791 // find identifier with id
792 self.metadata
793 .iter()
794 .find(|d| d.property == "identifier" && d.id.as_ref().is_some_and(|id| id == uid))
795 } else {
796 // fallback with the first identifier.
797 self.metadata.iter().find(|d| d.property == "identifier")
798 };
799 self.unique_identifier = identifier.map(|data| data.value.clone());
800
801 Ok(())
802 }
803
804 fn fill_metadata(&mut self, elem: &xmlutils::XMLNode) {
805 // refinements are inserted here with ID as key, these are later associated to metadata
806 let mut refinements: HashMap<String, Vec<MetadataRefinement>> = HashMap::new();
807 for r in &elem.children {
808 let item = r.borrow();
809 // for each acceptable element, either push a metadata item or push a refinement
810 match (item.name.namespace_ref(), &item.name.local_name) {
811 // dcterms
812 (Some("http://purl.org/dc/elements/1.1/"), name) => {
813 let id = item.get_attr("id");
814 let lang = item.get_attr("lang");
815 let property = name.clone();
816 let value = item.text.clone().unwrap_or_default();
817
818 let refined: Vec<MetadataRefinement> =
819 if let EpubVersion::Version3_0 = self.version {
820 vec![]
821 } else {
822 // treat it as EPUB2 dcterms, storing additional info in attributes
823 item.attrs
824 .iter()
825 .filter_map(|attr| {
826 if let Some("http://www.idpf.org/2007/opf") =
827 attr.name.namespace_ref()
828 {
829 let property = attr.name.local_name.clone();
830 let value = attr.value.clone();
831 Some(MetadataRefinement {
832 property,
833 value,
834 lang: None,
835 scheme: None,
836 })
837 } else {
838 None
839 }
840 })
841 .collect()
842 };
843 self.metadata.push(MetadataItem {
844 id,
845 property,
846 value,
847 lang,
848 refined,
849 });
850 }
851
852 // <meta>
853 (Some("http://www.idpf.org/2007/opf"), name)
854 if name.eq_ignore_ascii_case("meta") =>
855 {
856 if let Some(property) = item.get_attr("property") {
857 // EPUB3 <meta>, value in its text content
858 let value = item.text.clone().unwrap_or_default();
859 let lang = item.get_attr("lang");
860 if let Some(refines) = item.get_attr("refines") {
861 // refinement (subexpression in EPUB3 terminology)
862 let tid = refines.strip_prefix('#').unwrap_or_else(|| &refines);
863 let scheme = item.get_attr("scheme");
864 let refinement = MetadataRefinement {
865 property,
866 value,
867 lang,
868 scheme,
869 };
870 if let Some(refs) = refinements.get_mut(tid) {
871 refs.push(refinement);
872 } else {
873 refinements.insert(tid.to_string(), vec![refinement]);
874 }
875 } else {
876 // primary
877 let id = item.get_attr("id");
878 self.metadata.push(MetadataItem {
879 id,
880 property,
881 value,
882 lang,
883 refined: vec![],
884 });
885 }
886 } else if let (Some(property), Some(value)) =
887 (item.get_attr("name"), item.get_attr("content"))
888 {
889 // Legacy XHTML1.1 <meta>
890 self.metadata.push(MetadataItem {
891 id: None,
892 property,
893 value,
894 lang: None,
895 refined: vec![],
896 });
897 }
898 }
899
900 _ => (),
901 }
902 }
903
904 // associate refinements
905 self.metadata.iter_mut().for_each(|item| {
906 if let Some(id) = &item.id {
907 if let Some(mut refs) = refinements.remove(id) {
908 item.refined.append(&mut refs);
909 }
910 }
911 });
912 }
913
914 // Forcibly converts separators in a filepath to unix separators to
915 // to ensure that ZipArchive's by_name method will retrieve the proper
916 // file. Failing to convert to unix-style on Windows causes the
917 // ZipArchive not to find the file.
918 fn convert_path_seps<P: AsRef<Path>>(&self, href: P) -> PathBuf {
919 let mut path = self.root_base.join(href);
920 if cfg!(windows) {
921 path = PathBuf::from(path.to_string_lossy().replace('\\', "/"));
922 }
923 path
924 }
925
926 fn insert_resource(&mut self, item: &xmlutils::XMLNode) -> Result<(), XMLError> {
927 let id = item
928 .get_attr("id")
929 .ok_or_else(|| XMLError::AttrNotFound("id".into()))?;
930 let href = item
931 .get_attr("href")
932 .ok_or_else(|| XMLError::AttrNotFound("href".into()))?;
933 let mime = item
934 .get_attr("media-type")
935 .ok_or_else(|| XMLError::AttrNotFound("media-type".into()))?;
936 let properties = item.get_attr("properties");
937
938 self.resources.insert(
939 id,
940 ResourceItem {
941 path: self.convert_path_seps(href),
942 mime,
943 properties,
944 },
945 );
946 Ok(())
947 }
948
949 fn insert_spine(&mut self, item: &xmlutils::XMLNode) -> Result<(), DocError> {
950 let idref = item
951 .get_attr("idref")
952 .ok_or_else(|| XMLError::AttrNotFound("idref".into()))?;
953 let linear = item.get_attr("linear").unwrap_or("yes".into()) == "yes";
954 let properties = item.get_attr("properties");
955 let id = item.get_attr("id");
956 self.spine.push(SpineItem {
957 idref,
958 id,
959 linear,
960 properties,
961 });
962 Ok(())
963 }
964
965 fn fill_toc(&mut self, id: &str) -> Result<(), DocError> {
966 let toc_res = self.resources.get(id).ok_or(DocError::InvalidEpub)?; // this should be turned into it's own error type, but
967
968 let container = self.archive.get_entry(&toc_res.path)?;
969 let root = xmlutils::XMLReader::parse(container.as_slice())?;
970
971 self.toc_title = root
972 .borrow()
973 .find("docTitle")
974 .and_then(|dt| {
975 dt.borrow()
976 .children
977 .get(0)
978 .and_then(|t| t.borrow().text.clone())
979 })
980 .unwrap_or_default();
981
982 let mapnode = root
983 .borrow()
984 .find("navMap")
985 .ok_or_else(|| XMLError::AttrNotFound("navMap".into()))?;
986
987 self.toc.append(&mut self.get_navpoints(&mapnode.borrow()));
988 self.toc.sort();
989
990 Ok(())
991 }
992
993 /// Recursively extract all navpoints from a node.
994 fn get_navpoints(&self, parent: &xmlutils::XMLNode) -> Vec<NavPoint> {
995 let mut navpoints = Vec::new();
996
997 // TODO: parse metadata (dtb:totalPageCount, dtb:depth, dtb:maxPageNumber)
998
999 for nav in &parent.children {
1000 let item = nav.borrow();
1001 if item.name.local_name != "navPoint" {
1002 continue;
1003 }
1004 let play_order = item
1005 .get_attr("playOrder")
1006 .and_then(|n| n.parse::<usize>().ok());
1007 let content = item
1008 .find("content")
1009 .and_then(|c| c.borrow().get_attr("src").map(|p| self.root_base.join(p)));
1010
1011 let label = item.find("navLabel").and_then(|l| {
1012 l.borrow()
1013 .children
1014 .get(0)
1015 .and_then(|t| t.borrow().text.clone())
1016 });
1017
1018 if let (order, Some(content_path), Some(label_text)) = (play_order, content, label) {
1019 let navpoint = NavPoint {
1020 label: label_text.clone(),
1021 content: content_path.clone(),
1022 children: self.get_navpoints(&item),
1023 play_order: order,
1024 };
1025 navpoints.push(navpoint);
1026 }
1027 }
1028
1029 navpoints.sort();
1030 navpoints
1031 }
1032}
1033
1034fn get_root_file(container: &[u8]) -> Result<PathBuf, DocError> {
1035 let root = xmlutils::XMLReader::parse(container)?;
1036 let el = root.borrow();
1037 let element = el
1038 .find("rootfile")
1039 .ok_or_else(|| XMLError::AttrNotFound("rootfile".into()))?;
1040 let el2 = element.borrow();
1041
1042 let attr = el2
1043 .get_attr("full-path")
1044 .ok_or_else(|| XMLError::AttrNotFound("full-path".into()))?;
1045
1046 Ok(PathBuf::from(attr))
1047}
1048
1049fn build_epub_uri<P: AsRef<Path>>(path: P, append: &str) -> String {
1050 // allowing external links
1051 if append.starts_with("http") {
1052 return String::from(append);
1053 }
1054
1055 let path = path.as_ref();
1056 let mut cpath = path.to_path_buf();
1057
1058 // current file base dir
1059 cpath.pop();
1060 for p in Path::new(append).components() {
1061 match p {
1062 Component::ParentDir => {
1063 cpath.pop();
1064 }
1065 Component::Normal(s) => {
1066 cpath.push(s);
1067 }
1068 _ => {}
1069 };
1070 }
1071
1072 // If on Windows, replace all Windows path separators with Unix path separators
1073 let path = if cfg!(windows) {
1074 cpath.to_string_lossy().replace('\\', "/")
1075 } else {
1076 cpath.to_string_lossy().to_string()
1077 };
1078
1079 format!("epub://{}", path)
1080}