1use std::{
24 collections::HashMap,
25 fs::{self, File},
26 io::{BufReader, Read, Seek},
27 path::{Path, PathBuf},
28 sync::{
29 Arc, Mutex, OnceLock,
30 atomic::{AtomicUsize, Ordering},
31 },
32};
33
34#[cfg(not(feature = "no-indexmap"))]
35use indexmap::IndexMap;
36use zip::{ZipArchive, result::ZipError};
37
38use crate::{
39 error::EpubError,
40 types::{
41 EncryptionData, EpubVersion, ManifestItem, MetadataItem, MetadataLinkItem,
42 MetadataRefinement, MetadataSheet, NavPoint, SpineItem,
43 },
44 utils::{
45 DecodeBytes, NormalizeWhitespace, XmlElement, XmlReader, adobe_font_dencryption,
46 check_realtive_link_leakage, compression_method_check, get_file_in_zip_archive,
47 idpf_font_dencryption,
48 },
49};
50
51pub struct EpubDoc<R: Read + Seek + Send> {
77 pub(crate) archive: Arc<Mutex<ZipArchive<R>>>,
79
80 pub(crate) epub_path: PathBuf,
82
83 pub package_path: PathBuf,
85
86 pub base_path: PathBuf,
88
89 pub version: EpubVersion,
91
92 pub unique_identifier: String,
96
97 pub metadata: Vec<MetadataItem>,
99
100 pub metadata_link: Vec<MetadataLinkItem>,
102
103 #[cfg(not(feature = "no-indexmap"))]
124 pub manifest: IndexMap<String, ManifestItem>,
125 #[cfg(feature = "no-indexmap")]
126 pub manifest: HashMap<String, ManifestItem>,
127
128 pub spine: Vec<SpineItem>,
133
134 pub encryption: Option<Vec<EncryptionData>>,
136
137 pub catalog: Vec<NavPoint>,
139
140 pub catalog_title: String,
142
143 current_spine_index: AtomicUsize,
145
146 has_encryption: bool,
148
149 metadata_sheet: OnceLock<MetadataSheet>,
151}
152
153impl<R: Read + Seek + Send> EpubDoc<R> {
154 pub fn from_reader(reader: R, epub_path: PathBuf) -> Result<Self, EpubError> {
174 let mut archive = ZipArchive::new(reader).map_err(EpubError::from)?;
184 let epub_path = fs::canonicalize(epub_path)?;
185
186 compression_method_check(&mut archive)?;
187
188 let container =
189 get_file_in_zip_archive(&mut archive, "META-INF/container.xml")?.decode()?;
190 let package_path = Self::parse_container(container)?;
191 let base_path = package_path
192 .parent()
193 .expect("the parent directory of the opf file must exist")
194 .to_path_buf();
195
196 let opf_file = get_file_in_zip_archive(
197 &mut archive,
198 package_path
199 .to_str()
200 .expect("package_path should be valid UTF-8"),
201 )?
202 .decode()?;
203 let package = XmlReader::parse(&opf_file)?;
204
205 let version = Self::determine_epub_version(&package)?;
206 let has_encryption = archive
207 .by_path(Path::new("META-INF/encryption.xml"))
208 .is_ok();
209
210 let mut doc = Self {
211 archive: Arc::new(Mutex::new(archive)),
212 epub_path,
213 package_path,
214 base_path,
215 version,
216 unique_identifier: String::new(),
217 metadata: vec![],
218 metadata_link: vec![],
219
220 #[cfg(feature = "no-indexmap")]
221 manifest: HashMap::new(),
222 #[cfg(not(feature = "no-indexmap"))]
223 manifest: IndexMap::new(),
224
225 spine: vec![],
226 encryption: None,
227 catalog: vec![],
228 catalog_title: String::new(),
229 current_spine_index: AtomicUsize::new(0),
230 has_encryption,
231 metadata_sheet: OnceLock::new(),
232 };
233
234 let metadata_element = package.find_elements_by_name("metadata").next().unwrap();
235 let manifest_element = package.find_elements_by_name("manifest").next().unwrap();
236 let spine_element = package.find_elements_by_name("spine").next().unwrap();
237
238 doc.parse_metadata(metadata_element)?;
239 doc.parse_manifest(manifest_element)?;
240 doc.parse_spine(spine_element)?;
241 doc.parse_encryption()?;
242 doc.parse_catalog()?;
243
244 doc.unique_identifier = if let Some(uid) = package.get_attr("unique-identifier") {
246 doc.metadata.iter().find(|item| {
247 item.property == "identifier" && item.id.as_ref().is_some_and(|id| id == &uid)
248 })
249 } else {
250 doc.metadata
251 .iter()
252 .find(|item| item.property == "identifier")
253 }
254 .map(|item| item.value.clone())
255 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "dc:identifier".to_string() })?;
256
257 Ok(doc)
258 }
259
260 fn parse_container(data: String) -> Result<PathBuf, EpubError> {
276 let root = XmlReader::parse(&data)?;
277 let rootfile = root
278 .find_elements_by_name("rootfile")
279 .next()
280 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "rootfile".to_string() })?;
281
282 let attr =
283 rootfile
284 .get_attr("full-path")
285 .ok_or_else(|| EpubError::MissingRequiredAttribute {
286 tag: "rootfile".to_string(),
287 attribute: "full-path".to_string(),
288 })?;
289
290 Ok(PathBuf::from(attr))
291 }
292
293 fn parse_metadata(&mut self, metadata_element: &XmlElement) -> Result<(), EpubError> {
304 const DC_NAMESPACE: &str = "http://purl.org/dc/elements/1.1/";
305 const OPF_NAMESPACE: &str = "http://www.idpf.org/2007/opf";
306
307 let mut metadata = Vec::new();
308 let mut metadata_link = Vec::new();
309 let mut refinements = HashMap::<String, Vec<MetadataRefinement>>::new();
310
311 for element in metadata_element.children() {
312 match &element.namespace {
313 Some(namespace) if namespace == DC_NAMESPACE => {
314 self.parse_dc_metadata(element, &mut metadata)?
315 }
316
317 Some(namespace) if namespace == OPF_NAMESPACE => self.parse_opf_metadata(
318 element,
319 &mut metadata,
320 &mut metadata_link,
321 &mut refinements,
322 )?,
323
324 _ => {}
325 };
326 }
327
328 for item in metadata.iter_mut() {
329 if let Some(id) = &item.id {
330 if let Some(refinements) = refinements.remove(id) {
331 item.refined = refinements;
332 }
333 }
334 }
335
336 self.metadata = metadata;
337 self.metadata_link = metadata_link;
338 Ok(())
339 }
340
341 fn parse_manifest(&mut self, manifest_element: &XmlElement) -> Result<(), EpubError> {
351 let estimated_items = manifest_element.children().count();
352 #[cfg(feature = "no-indexmap")]
353 let mut resources = HashMap::with_capacity(estimated_items);
354 #[cfg(not(feature = "no-indexmap"))]
355 let mut resources = IndexMap::with_capacity(estimated_items);
356
357 for element in manifest_element.children() {
358 let id = element
359 .get_attr("id")
360 .ok_or_else(|| EpubError::MissingRequiredAttribute {
361 tag: element.tag_name(),
362 attribute: "id".to_string(),
363 })?
364 .to_string();
365 let path = element
366 .get_attr("href")
367 .ok_or_else(|| EpubError::MissingRequiredAttribute {
368 tag: element.tag_name(),
369 attribute: "href".to_string(),
370 })?
371 .to_string();
372 let mime = element
373 .get_attr("media-type")
374 .ok_or_else(|| EpubError::MissingRequiredAttribute {
375 tag: element.tag_name(),
376 attribute: "media-type".to_string(),
377 })?
378 .to_string();
379 let properties = element.get_attr("properties");
380 let fallback = element.get_attr("fallback");
381
382 resources.insert(
383 id.clone(),
384 ManifestItem {
385 id,
386 path: self.normalize_manifest_path(&path)?,
387 mime,
388 properties,
389 fallback,
390 },
391 );
392 }
393
394 self.manifest = resources;
395 self.validate_fallback_chains();
396 Ok(())
397 }
398
399 fn parse_spine(&mut self, spine_element: &XmlElement) -> Result<(), EpubError> {
409 let mut spine = Vec::new();
410 for element in spine_element.children() {
411 let idref = element
412 .get_attr("idref")
413 .ok_or_else(|| EpubError::MissingRequiredAttribute {
414 tag: element.tag_name(),
415 attribute: "idref".to_string(),
416 })?
417 .to_string();
418 let id = element.get_attr("id");
419 let linear = element
420 .get_attr("linear")
421 .map(|linear| linear == "yes")
422 .unwrap_or(true);
423 let properties = element.get_attr("properties");
424
425 spine.push(SpineItem { idref, id, linear, properties });
426 }
427
428 self.spine = spine;
429 Ok(())
430 }
431
432 fn parse_encryption(&mut self) -> Result<(), EpubError> {
442 if !self.has_encryption() {
443 return Ok(());
444 }
445
446 let mut archive = self.archive.lock()?;
447 let encryption_file =
448 get_file_in_zip_archive(&mut archive, "META-INF/encryption.xml")?.decode()?;
449
450 let root = XmlReader::parse(&encryption_file)?;
451
452 let mut encryption_data = Vec::new();
453 for data in root.children() {
454 if data.name != "EncryptedData" {
455 continue;
456 }
457
458 let method = data
459 .find_elements_by_name("EncryptionMethod")
460 .next()
461 .ok_or_else(|| EpubError::NonCanonicalFile {
462 tag: "EncryptionMethod".to_string(),
463 })?;
464 let reference = data
465 .find_elements_by_name("CipherReference")
466 .next()
467 .ok_or_else(|| EpubError::NonCanonicalFile {
468 tag: "CipherReference".to_string(),
469 })?;
470
471 encryption_data.push(EncryptionData {
472 method: method
473 .get_attr("Algorithm")
474 .ok_or_else(|| EpubError::MissingRequiredAttribute {
475 tag: "EncryptionMethod".to_string(),
476 attribute: "Algorithm".to_string(),
477 })?
478 .to_string(),
479 data: reference
480 .get_attr("URI")
481 .ok_or_else(|| EpubError::MissingRequiredAttribute {
482 tag: "CipherReference".to_string(),
483 attribute: "URI".to_string(),
484 })?
485 .to_string(),
486 });
487 }
488
489 if !encryption_data.is_empty() {
490 self.encryption = Some(encryption_data);
491 }
492
493 Ok(())
494 }
495
496 fn parse_catalog(&mut self) -> Result<(), EpubError> {
503 const HEAD_TAGS: [&str; 6] = ["h1", "h2", "h3", "h4", "h5", "h6"];
504
505 let mut archive = self.archive.lock()?;
506 match self.version {
507 EpubVersion::Version2_0 => {
508 let opf_file =
509 get_file_in_zip_archive(&mut archive, self.package_path.to_str().unwrap())?
510 .decode()?;
511 let opf_element = XmlReader::parse(&opf_file)?;
512
513 let toc_id = opf_element
514 .find_children_by_name("spine")
515 .next()
516 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?
517 .get_attr("toc")
518 .ok_or_else(|| EpubError::MissingRequiredAttribute {
519 tag: "spine".to_string(),
520 attribute: "toc".to_string(),
521 })?
522 .to_owned();
523 let toc_path = self
524 .manifest
525 .get(&toc_id)
526 .ok_or(EpubError::ResourceIdNotExist { id: toc_id })?
527 .path
528 .to_str()
529 .unwrap();
530
531 let ncx_file = get_file_in_zip_archive(&mut archive, toc_path)?.decode()?;
532 let ncx = XmlReader::parse(&ncx_file)?;
533
534 match ncx.find_elements_by_name("docTitle").next() {
535 Some(element) => self.catalog_title = element.text(),
536 None => log::warn!(
537 "Expecting to get docTitle information from the ncx file, but it's missing."
538 ),
539 };
540
541 let nav_map = ncx
542 .find_elements_by_name("navMap")
543 .next()
544 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "navMap".to_string() })?;
545
546 self.catalog = self.parse_nav_points(nav_map)?;
547
548 Ok(())
549 }
550
551 EpubVersion::Version3_0 => {
552 let nav_path = self
553 .manifest
554 .values()
555 .find(|item| {
556 if let Some(property) = &item.properties {
557 return property.contains("nav");
558 }
559 false
560 })
561 .map(|item| item.path.clone())
562 .ok_or_else(|| EpubError::NonCanonicalEpub {
563 expected_file: "Navigation Document".to_string(),
564 })?;
565
566 let nav_file =
567 get_file_in_zip_archive(&mut archive, nav_path.to_str().unwrap())?.decode()?;
568
569 let nav_element = XmlReader::parse(&nav_file)?;
570 let nav = nav_element
571 .find_elements_by_name("nav")
572 .find(|&element| element.get_attr("epub:type") == Some(String::from("toc")))
573 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "nav".to_string() })?;
574 let nav_title = nav.find_children_by_names(&HEAD_TAGS).next();
575 let nav_list = nav
576 .find_children_by_name("ol")
577 .next()
578 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "ol".to_string() })?;
579
580 self.catalog = self.parse_catalog_list(nav_list)?;
581 if let Some(nav_title) = nav_title {
582 self.catalog_title = nav_title.text();
583 };
584 Ok(())
585 }
586 }
587 }
588
589 #[inline]
605 pub fn has_encryption(&self) -> bool {
606 self.has_encryption
607 }
608
609 pub fn get_metadata(&self, key: &str) -> Option<Vec<MetadataItem>> {
623 let metadatas = self
624 .metadata
625 .iter()
626 .filter(|item| item.property == key)
627 .cloned()
628 .collect::<Vec<MetadataItem>>();
629
630 (!metadatas.is_empty()).then_some(metadatas)
631 }
632
633 pub fn get_metadata_value(&self, key: &str) -> Option<Vec<String>> {
645 let values = self
646 .metadata
647 .iter()
648 .filter(|item| item.property == key)
649 .map(|item| item.value.clone())
650 .collect::<Vec<String>>();
651
652 (!values.is_empty()).then_some(values)
653 }
654
655 #[inline]
668 pub fn get_title(&self) -> Vec<String> {
669 self.get_metadata_value("title")
670 .expect("missing required 'title' metadata which is required by the EPUB specification")
671 }
672
673 #[inline]
687 pub fn get_language(&self) -> Vec<String> {
688 self.get_metadata_value("language").expect(
689 "missing required 'language' metadata which is required by the EPUB specification",
690 )
691 }
692
693 #[inline]
709 pub fn get_identifier(&self) -> Vec<String> {
710 self.get_metadata_value("identifier").expect(
711 "missing required 'identifier' metadata which is required by the EPUB specification",
712 )
713 }
714
715 pub fn get_metadata_sheet(&self) -> &MetadataSheet {
729 self.metadata_sheet.get_or_init(|| {
730 let mut sheet = MetadataSheet::new();
731 for item in &self.metadata {
732 let value = item.value.clone();
733
734 match item.property.as_str() {
735 "title" => {
736 sheet.title.push(value);
737 }
738 "creator" => {
739 sheet.creator.push(value);
740 }
741 "contributor" => {
742 sheet.contributor.push(value);
743 }
744 "subject" => {
745 sheet.subject.push(value);
746 }
747 "language" => {
748 sheet.language.push(value);
749 }
750 "relation" => {
751 sheet.relation.push(value);
752 }
753 "date" => {
754 let event = item
755 .refined
756 .iter()
757 .filter_map(|refine| {
758 if refine.property.eq("event") {
759 Some(refine.value.clone())
760 } else {
761 None
762 }
763 })
764 .next()
765 .unwrap_or_default();
766 sheet.date.insert(value, event);
767 }
768 "identifier" => {
769 let id = item.id.clone().unwrap_or_default();
770 sheet.identifier.insert(id, value);
771 }
772 "description" => {
773 sheet.description = value;
774 }
775 "format" => {
776 sheet.format = value;
777 }
778 "publisher" => {
779 sheet.publisher = value;
780 }
781 "rights" => {
782 sheet.rights = value;
783 }
784 "source" => {
785 sheet.source = value;
786 }
787 "ccoverage" => {
788 sheet.coverage = value;
789 }
790 "type" => {
791 sheet.epub_type = value;
792 }
793 _ => {}
794 };
795 }
796 sheet
797 })
798 }
799
800 pub fn get_manifest_item(&self, id: &str) -> Result<(Vec<u8>, String), EpubError> {
817 let resource_item = self
818 .manifest
819 .get(id)
820 .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
821
822 self.get_resource(resource_item)
823 }
824
825 pub fn get_manifest_item_by_path(&self, path: &str) -> Result<(Vec<u8>, String), EpubError> {
844 let manifest = self
845 .manifest
846 .iter()
847 .find(|(_, item)| item.path.to_str().unwrap() == path)
848 .map(|(_, manifest)| manifest)
849 .ok_or_else(|| EpubError::ResourceNotFound { resource: path.to_string() })?;
850
851 self.get_resource(manifest)
852 }
853
854 pub fn get_manifest_item_with_fallback(
870 &self,
871 id: &str,
872 supported_format: &[&str],
873 ) -> Result<(Vec<u8>, String), EpubError> {
874 let mut current_id = id;
875 let mut fallback_chain = Vec::<&str>::new();
876 'fallback: loop {
877 let manifest_item = self
878 .manifest
879 .get(current_id)
880 .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
881
882 if supported_format.contains(&manifest_item.mime.as_str()) {
883 return self.get_resource(manifest_item);
884 }
885
886 let fallback_id = match &manifest_item.fallback {
887 None => break 'fallback,
889
890 Some(id) if fallback_chain.contains(&id.as_str()) => break 'fallback,
892
893 Some(id) => {
894 fallback_chain.push(id.as_str());
895
896 id.as_str()
900 }
901 };
902
903 current_id = fallback_id;
904 }
905
906 Err(EpubError::NoSupportedFileFormat)
907 }
908
909 pub fn get_cover(&self) -> Option<(Vec<u8>, String)> {
926 self.manifest
927 .values()
928 .filter(|manifest| {
929 manifest.id.to_ascii_lowercase().contains("cover")
930 || manifest
931 .properties
932 .as_ref()
933 .map(|properties| properties.to_ascii_lowercase().contains("cover"))
934 .unwrap_or(false)
935 })
936 .find_map(|manifest| {
937 self.get_resource(manifest)
938 .map_err(|err| log::warn!("{err}"))
939 .ok()
940 })
941 }
942
943 fn get_resource(&self, resource_item: &ManifestItem) -> Result<(Vec<u8>, String), EpubError> {
945 let path = resource_item
946 .path
947 .to_str()
948 .expect("manifest item path should be valid UTF-8");
949
950 let mut archive = self.archive.lock()?;
951 let mut data = match archive.by_name(path) {
952 Ok(mut file) => {
953 let mut entry = Vec::<u8>::new();
954 file.read_to_end(&mut entry)?;
955 Ok(entry)
956 }
957 Err(ZipError::FileNotFound) => {
958 Err(EpubError::ResourceNotFound { resource: path.to_string() })
959 }
960 Err(err) => Err(EpubError::from(err)),
961 }?;
962
963 if let Some(method) = self.is_encryption_file(path) {
964 data = self.auto_dencrypt(&method, &mut data)?;
965 }
966
967 Ok((data, resource_item.mime.clone()))
968 }
969
970 pub fn navigate_by_spine_index(&self, index: usize) -> Option<(Vec<u8>, String)> {
989 if index >= self.spine.len() {
990 return None;
991 }
992
993 let manifest_id = self.spine[index].idref.as_ref();
994 self.current_spine_index.store(index, Ordering::SeqCst);
995 self.get_manifest_item(manifest_id)
996 .map_err(|err| log::warn!("{err}"))
997 .ok()
998 }
999
1000 pub fn spine_prev(&self) -> Option<(Vec<u8>, String)> {
1012 let current_index = self.current_spine_index.load(Ordering::SeqCst);
1013 if current_index == 0 || !self.spine[current_index].linear {
1014 return None;
1015 }
1016
1017 let prev_index = (0..current_index)
1018 .rev()
1019 .find(|&index| self.spine[index].linear)?;
1020
1021 self.current_spine_index.store(prev_index, Ordering::SeqCst);
1022 let manifest_id = self.spine[prev_index].idref.as_ref();
1023 self.get_manifest_item(manifest_id)
1024 .map_err(|err| log::warn!("{err}"))
1025 .ok()
1026 }
1027
1028 pub fn spine_next(&self) -> Option<(Vec<u8>, String)> {
1040 let current_index = self.current_spine_index.load(Ordering::SeqCst);
1041 if current_index >= self.spine.len() - 1 || !self.spine[current_index].linear {
1042 return None;
1043 }
1044
1045 let next_index =
1046 (current_index + 1..self.spine.len()).find(|&index| self.spine[index].linear)?;
1047
1048 self.current_spine_index.store(next_index, Ordering::SeqCst);
1049 let manifest_id = self.spine[next_index].idref.as_ref();
1050 self.get_manifest_item(manifest_id)
1051 .map_err(|err| log::warn!("{err}"))
1052 .ok()
1053 }
1054
1055 pub fn spine_current(&self) -> Option<(Vec<u8>, String)> {
1065 let manifest_id = self.spine[self.current_spine_index.load(Ordering::SeqCst)]
1066 .idref
1067 .as_ref();
1068 self.get_manifest_item(manifest_id)
1069 .map_err(|err| log::warn!("{err}"))
1070 .ok()
1071 }
1072
1073 fn determine_epub_version(opf_element: &XmlElement) -> Result<EpubVersion, EpubError> {
1083 if let Some(version) = opf_element.get_attr("version") {
1085 match version.as_str() {
1086 "2.0" => return Ok(EpubVersion::Version2_0),
1087 "3.0" => return Ok(EpubVersion::Version3_0),
1088 _ => {}
1089 }
1090 }
1091
1092 let spine_element = opf_element
1093 .find_elements_by_name("spine")
1094 .next()
1095 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?;
1096
1097 if spine_element.get_attr("toc").is_some() {
1099 return Ok(EpubVersion::Version2_0);
1100 }
1101
1102 let manifest_element = opf_element
1103 .find_elements_by_name("manifest")
1104 .next()
1105 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "manifest".to_string() })?;
1106
1107 manifest_element
1109 .children()
1110 .find_map(|element| {
1111 if let Some(id) = element.get_attr("id") {
1112 if id.eq("nav") {
1113 return Some(EpubVersion::Version3_0);
1114 }
1115 }
1116
1117 None
1118 })
1119 .ok_or(EpubError::UnrecognizedEpubVersion)
1120 }
1121
1122 #[inline]
1132 fn parse_dc_metadata(
1133 &self,
1134 element: &XmlElement,
1135 metadata: &mut Vec<MetadataItem>,
1136 ) -> Result<(), EpubError> {
1138 let id = element.get_attr("id");
1139 let lang = element.get_attr("lang");
1140 let property = element.name.clone();
1141 let value = element.text().normalize_whitespace();
1142
1143 let refined = match self.version {
1144 EpubVersion::Version2_0 => element
1147 .attributes
1148 .iter()
1149 .map(|(name, value)| {
1150 let property = name.to_string();
1151 let value = value.to_string().normalize_whitespace();
1152
1153 MetadataRefinement {
1154 refines: id.clone().unwrap(),
1155 property,
1156 value,
1157 lang: None,
1158 scheme: None,
1159 }
1160 })
1161 .collect(),
1162 EpubVersion::Version3_0 => vec![],
1163 };
1164
1165 metadata.push(MetadataItem { id, property, value, lang, refined });
1166
1167 Ok(())
1168 }
1169
1170 #[inline]
1181 fn parse_opf_metadata(
1182 &self,
1183 element: &XmlElement,
1184 metadata: &mut Vec<MetadataItem>,
1185 metadata_link: &mut Vec<MetadataLinkItem>,
1186 refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1187 ) -> Result<(), EpubError> {
1188 match element.name.as_str() {
1189 "meta" => self.parse_meta_element(element, metadata, refinements),
1190 "link" => self.parse_link_element(element, metadata_link),
1191 _ => Ok(()),
1192 }
1193 }
1194
1195 #[inline]
1196 fn parse_meta_element(
1197 &self,
1198 element: &XmlElement,
1199 metadata: &mut Vec<MetadataItem>,
1200 refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1201 ) -> Result<(), EpubError> {
1202 match self.version {
1203 EpubVersion::Version2_0 => {
1204 let property = element
1205 .get_attr("name")
1206 .ok_or_else(|| EpubError::NonCanonicalFile { tag: element.tag_name() })?;
1207 let value = element
1208 .get_attr("content")
1209 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1210 tag: element.tag_name(),
1211 attribute: "content".to_string(),
1212 })?
1213 .normalize_whitespace();
1214
1215 metadata.push(MetadataItem {
1216 id: None,
1217 property,
1218 value,
1219 lang: None,
1220 refined: vec![],
1221 });
1222 }
1223
1224 EpubVersion::Version3_0 => {
1225 let property = element.get_attr("property").ok_or_else(|| {
1226 EpubError::MissingRequiredAttribute {
1227 tag: element.tag_name(),
1228 attribute: "property".to_string(),
1229 }
1230 })?;
1231 let value = element.text().normalize_whitespace();
1232 let lang = element.get_attr("lang");
1233
1234 if let Some(refines) = element.get_attr("refines") {
1235 let id = refines.strip_prefix("#").unwrap_or(&refines).to_string();
1236 let scheme = element.get_attr("scheme");
1237 let refinement = MetadataRefinement {
1238 refines: id.clone(),
1239 property,
1240 value,
1241 lang,
1242 scheme,
1243 };
1244
1245 if let Some(refinements) = refinements.get_mut(&id) {
1246 refinements.push(refinement);
1247 } else {
1248 refinements.insert(id, vec![refinement]);
1249 }
1250 } else {
1251 let id = element.get_attr("id");
1252 let item = MetadataItem {
1253 id,
1254 property,
1255 value,
1256 lang,
1257 refined: vec![],
1258 };
1259
1260 metadata.push(item);
1261 };
1262 }
1263 }
1264 Ok(())
1265 }
1266
1267 #[inline]
1268 fn parse_link_element(
1269 &self,
1270 element: &XmlElement,
1271 metadata_link: &mut Vec<MetadataLinkItem>,
1272 ) -> Result<(), EpubError> {
1273 let href = element
1274 .get_attr("href")
1275 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1276 tag: element.tag_name(),
1277 attribute: "href".to_string(),
1278 })?;
1279 let rel = element
1280 .get_attr("rel")
1281 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1282 tag: element.tag_name(),
1283 attribute: "rel".to_string(),
1284 })?;
1285 let hreflang = element.get_attr("hreflang");
1286 let id = element.get_attr("id");
1287 let mime = element.get_attr("media-type");
1288 let properties = element.get_attr("properties");
1289
1290 metadata_link.push(MetadataLinkItem {
1291 href,
1292 rel,
1293 hreflang,
1294 id,
1295 mime,
1296 properties,
1297 refines: None,
1298 });
1299 Ok(())
1300 }
1301
1302 fn parse_nav_points(&self, parent_element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1308 let mut nav_points = Vec::new();
1309 for nav_point in parent_element.find_children_by_name("navPoint") {
1310 let label = match nav_point.find_children_by_name("navLabel").next() {
1311 Some(element) => element.text(),
1312 None => String::new(),
1313 };
1314
1315 let content = nav_point
1316 .find_children_by_name("content")
1317 .next()
1318 .map(|element| PathBuf::from(element.text()));
1319
1320 let play_order = nav_point
1321 .get_attr("playOrder")
1322 .and_then(|order| order.parse::<usize>().ok());
1323
1324 let children = self.parse_nav_points(nav_point)?;
1325
1326 nav_points.push(NavPoint { label, content, play_order, children });
1327 }
1328
1329 nav_points.sort();
1330 Ok(nav_points)
1331 }
1332
1333 fn parse_catalog_list(&self, element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1339 let mut catalog = Vec::new();
1340 for item in element.children() {
1341 if item.tag_name() != "li" {
1342 return Err(EpubError::NonCanonicalFile { tag: "li".to_string() });
1343 }
1344
1345 let title_element = item
1346 .find_children_by_names(&["span", "a"])
1347 .next()
1348 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "span/a".to_string() })?;
1349 let content_href = title_element.get_attr("href").map(PathBuf::from);
1350 let sub_list = if let Some(list) = item.find_children_by_name("ol").next() {
1351 self.parse_catalog_list(list)?
1352 } else {
1353 vec![]
1354 };
1355
1356 catalog.push(NavPoint {
1357 label: title_element.text(),
1358 content: content_href,
1359 children: sub_list,
1360 play_order: None,
1361 });
1362 }
1363
1364 Ok(catalog)
1365 }
1366
1367 #[inline]
1384 fn normalize_manifest_path(&self, path: &str) -> Result<PathBuf, EpubError> {
1385 let path = if path.starts_with("../") {
1386 let mut current_dir = self.epub_path.join(&self.package_path);
1387 current_dir.pop();
1388
1389 check_realtive_link_leakage(self.epub_path.clone(), current_dir, path)
1390 .map(PathBuf::from)
1391 .ok_or_else(|| EpubError::RelativeLinkLeakage { path: path.to_string() })?
1392 } else if let Some(stripped) = path.strip_prefix("/") {
1393 PathBuf::from(stripped.to_string())
1394 } else {
1395 self.base_path.join(path)
1396 };
1397
1398 #[cfg(windows)]
1399 let path = PathBuf::from(path.to_string_lossy().replace('\\', "/"));
1400
1401 Ok(path)
1402 }
1403
1404 fn validate_fallback_chains(&self) {
1416 for (id, item) in &self.manifest {
1417 if item.fallback.is_none() {
1418 continue;
1419 }
1420
1421 let mut fallback_chain = Vec::new();
1422 if let Err(msg) = self.validate_fallback_chain(id, &mut fallback_chain) {
1423 log::warn!("Invalid fallback chain for item {}: {}", id, msg);
1424 }
1425 }
1426 }
1427
1428 fn validate_fallback_chain(
1442 &self,
1443 manifest_id: &str,
1444 fallback_chain: &mut Vec<String>,
1445 ) -> Result<(), String> {
1446 if fallback_chain.contains(&manifest_id.to_string()) {
1447 fallback_chain.push(manifest_id.to_string());
1448
1449 return Err(format!(
1450 "Circular reference detected in fallback chain for {}",
1451 fallback_chain.join("->")
1452 ));
1453 }
1454
1455 let item = self.manifest.get(manifest_id).unwrap();
1457
1458 if let Some(fallback_id) = &item.fallback {
1459 if !self.manifest.contains_key(fallback_id) {
1460 return Err(format!(
1461 "Fallback resource {} does not exist in manifest",
1462 fallback_id
1463 ));
1464 }
1465
1466 fallback_chain.push(manifest_id.to_string());
1467 self.validate_fallback_chain(fallback_id, fallback_chain)
1468 } else {
1469 Ok(())
1471 }
1472 }
1473
1474 fn is_encryption_file(&self, path: &str) -> Option<String> {
1487 self.encryption.as_ref().and_then(|encryptions| {
1488 encryptions
1489 .iter()
1490 .find(|encryption| encryption.data == path)
1491 .map(|encryption| encryption.method.clone())
1492 })
1493 }
1494
1495 #[inline]
1513 fn auto_dencrypt(&self, method: &str, data: &mut [u8]) -> Result<Vec<u8>, EpubError> {
1514 match method {
1515 "http://www.idpf.org/2008/embedding" => {
1516 Ok(idpf_font_dencryption(data, &self.unique_identifier))
1517 }
1518 "http://ns.adobe.com/pdf/enc#RC" => {
1519 Ok(adobe_font_dencryption(data, &self.unique_identifier))
1520 }
1521 _ => Err(EpubError::UnsupportedEncryptedMethod { method: method.to_string() }),
1522 }
1523 }
1524}
1525
1526impl EpubDoc<BufReader<File>> {
1527 pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, EpubError> {
1539 let file = File::open(&path).map_err(EpubError::from)?;
1540 let path = fs::canonicalize(path)?;
1541
1542 Self::from_reader(BufReader::new(file), path)
1543 }
1544
1545 pub fn is_valid_epub<P: AsRef<Path>>(path: P) -> Result<bool, EpubError> {
1560 let result = EpubDoc::new(path);
1561
1562 match result {
1563 Ok(_) => Ok(true),
1564 Err(err) if Self::is_outside_error(&err) => Err(err),
1565 Err(_) => Ok(false),
1566 }
1567 }
1568
1569 fn is_outside_error(err: &EpubError) -> bool {
1588 matches!(
1589 err,
1590 EpubError::ArchiveError { .. }
1591 | EpubError::IOError { .. }
1592 | EpubError::MutexError
1593 | EpubError::Utf8DecodeError { .. }
1594 | EpubError::Utf16DecodeError { .. }
1595 | EpubError::QuickXmlError { .. }
1596 )
1597 }
1598}
1599
1600#[cfg(test)]
1601mod tests {
1602 use std::{
1603 fs::File,
1604 io::BufReader,
1605 path::{Path, PathBuf},
1606 };
1607
1608 use crate::{epub::EpubDoc, error::EpubError, utils::XmlReader};
1609
1610 mod package_documents_tests {
1612 use std::{path::Path, sync::atomic::Ordering};
1613
1614 use crate::epub::{EpubDoc, EpubVersion};
1615
1616 #[test]
1620 fn test_pkg_collections_unknown() {
1621 let epub_file = Path::new("./test_case/pkg-collections-unknown.epub");
1622 let doc = EpubDoc::new(epub_file);
1623 assert!(doc.is_ok());
1624 }
1625
1626 #[test]
1630 fn test_pkg_creator_order() {
1631 let epub_file = Path::new("./test_case/pkg-creator-order.epub");
1632 let doc = EpubDoc::new(epub_file);
1633 assert!(doc.is_ok());
1634
1635 let doc = doc.unwrap();
1636 let creators = doc.get_metadata_value("creator");
1637 assert!(creators.is_some());
1638
1639 let creators = creators.unwrap();
1640 assert_eq!(creators.len(), 5);
1641 assert_eq!(
1642 creators,
1643 vec![
1644 "Dave Cramer",
1645 "Wendy Reid",
1646 "Dan Lazin",
1647 "Ivan Herman",
1648 "Brady Duga",
1649 ]
1650 );
1651 }
1652
1653 #[test]
1657 fn test_pkg_manifest_order() {
1658 let epub_file = Path::new("./test_case/pkg-manifest-unknown.epub");
1659 let doc = EpubDoc::new(epub_file);
1660 assert!(doc.is_ok());
1661
1662 let doc = doc.unwrap();
1663 assert_eq!(doc.manifest.len(), 2);
1664 assert!(doc.get_manifest_item("nav").is_ok());
1665 assert!(doc.get_manifest_item("content_001").is_ok());
1666 assert!(doc.get_manifest_item("content_002").is_err());
1667 }
1668
1669 #[test]
1673 fn test_pkg_meta_unknown() {
1674 let epub_file = Path::new("./test_case/pkg-meta-unknown.epub");
1675 let doc = EpubDoc::new(epub_file);
1676 assert!(doc.is_ok());
1677
1678 let doc = doc.unwrap();
1679 let value = doc.get_metadata_value("dcterms:isReferencedBy");
1680 assert!(value.is_some());
1681 let value = value.unwrap();
1682 assert_eq!(value.len(), 1);
1683 assert_eq!(
1684 value,
1685 vec!["https://www.w3.org/TR/epub-rs/#confreq-rs-pkg-meta-unknown"]
1686 );
1687
1688 let value = doc.get_metadata_value("dcterms:modified");
1689 assert!(value.is_some());
1690 let value = value.unwrap();
1691 assert_eq!(value.len(), 1);
1692 assert_eq!(value, vec!["2021-01-11T00:00:00Z"]);
1693
1694 let value = doc.get_metadata_value("dcterms:title");
1695 assert!(value.is_none());
1696 }
1697
1698 #[test]
1702 fn test_pkg_meta_white_space() {
1703 let epub_file = Path::new("./test_case/pkg-meta-whitespace.epub");
1704 let doc = EpubDoc::new(epub_file);
1705 assert!(doc.is_ok());
1706
1707 let doc = doc.unwrap();
1708 let value = doc.get_metadata_value("creator");
1709 assert!(value.is_some());
1710 let value = value.unwrap();
1711 assert_eq!(value.len(), 1);
1712 assert_eq!(value, vec!["Dave Cramer"]);
1713
1714 let value = doc.get_metadata_value("description");
1715 assert!(value.is_some());
1716 let value = value.unwrap();
1717 assert_eq!(value.len(), 1);
1718 assert_eq!(
1719 value,
1720 vec![
1721 "The package document's title and creator contain leading and trailing spaces along with excess internal whitespace. The reading system must render only a single space in all cases."
1722 ]
1723 );
1724 }
1725
1726 #[test]
1730 fn test_pkg_spine_duplicate_item_hyperlink() {
1731 let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-hyperlink.epub");
1732 let doc = EpubDoc::new(epub_file);
1733 assert!(doc.is_ok());
1734
1735 let doc = doc.unwrap();
1736 assert_eq!(doc.spine.len(), 4);
1737 assert_eq!(
1738 doc.navigate_by_spine_index(0).unwrap(),
1739 doc.get_manifest_item("content_001").unwrap()
1740 );
1741 assert_eq!(
1742 doc.navigate_by_spine_index(1).unwrap(),
1743 doc.get_manifest_item("content_002").unwrap()
1744 );
1745 assert_eq!(
1746 doc.navigate_by_spine_index(2).unwrap(),
1747 doc.get_manifest_item("content_002").unwrap()
1748 );
1749 assert_eq!(
1750 doc.navigate_by_spine_index(3).unwrap(),
1751 doc.get_manifest_item("content_002").unwrap()
1752 );
1753 }
1754
1755 #[test]
1759 fn test_pkg_spine_duplicate_item_rendering() {
1760 let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-rendering.epub");
1761 let doc = EpubDoc::new(epub_file);
1762 assert!(doc.is_ok());
1763
1764 let doc = doc.unwrap();
1765 assert_eq!(doc.spine.len(), 4);
1766
1767 let result = doc.spine_prev();
1768 assert!(result.is_none());
1769
1770 let result = doc.spine_next();
1771 assert!(result.is_some());
1772
1773 doc.spine_next();
1774 doc.spine_next();
1775 let result = doc.spine_next();
1776 assert!(result.is_none());
1777 }
1778
1779 #[test]
1783 fn test_pkg_spine_nonlinear_activation() {
1784 let epub_file = Path::new("./test_case/pkg-spine-nonlinear-activation.epub");
1785 let doc = EpubDoc::new(epub_file);
1786 assert!(doc.is_ok());
1787
1788 let doc = doc.unwrap();
1789 assert!(doc.spine_prev().is_none());
1790 assert!(doc.spine_next().is_none());
1791
1792 assert!(doc.navigate_by_spine_index(1).is_some());
1793 assert!(doc.spine_prev().is_none());
1794 assert!(doc.spine_next().is_none());
1795 }
1796
1797 #[test]
1801 fn test_pkg_spine_order() {
1802 let epub_file = Path::new("./test_case/pkg-spine-order.epub");
1803 let doc = EpubDoc::new(epub_file);
1804 assert!(doc.is_ok());
1805
1806 let doc = doc.unwrap();
1807 assert_eq!(doc.spine.len(), 4);
1808 assert_eq!(
1809 doc.spine
1810 .iter()
1811 .map(|item| item.idref.clone())
1812 .collect::<Vec<String>>(),
1813 vec![
1814 "d-content_001",
1815 "c-content_002",
1816 "b-content_003",
1817 "a-content_004",
1818 ]
1819 );
1820 }
1821
1822 #[test]
1826 fn test_spine_order_svg() {
1827 let epub_file = Path::new("./test_case/pkg-spine-order-svg.epub");
1828 let doc = EpubDoc::new(epub_file);
1829 assert!(doc.is_ok());
1830
1831 let doc = doc.unwrap();
1832 assert_eq!(doc.spine.len(), 4);
1833
1834 loop {
1835 if let Some(spine) = doc.spine_next() {
1836 let idref = doc.spine[doc.current_spine_index.load(Ordering::Relaxed)]
1837 .idref
1838 .clone();
1839 let resource = doc.get_manifest_item(&idref);
1840 assert!(resource.is_ok());
1841
1842 let resource = resource.unwrap();
1843 assert_eq!(spine, resource);
1844 } else {
1845 break;
1846 }
1847 }
1848
1849 assert_eq!(doc.current_spine_index.load(Ordering::Relaxed), 3);
1850 }
1851
1852 #[test]
1856 fn test_pkg_spine_unknown() {
1857 let epub_file = Path::new("./test_case/pkg-spine-unknown.epub");
1858 let doc = EpubDoc::new(epub_file);
1859 assert!(doc.is_ok());
1860
1861 let doc = doc.unwrap();
1862 assert_eq!(doc.spine.len(), 1);
1863 assert_eq!(doc.spine[0].idref, "content_001");
1864 assert_eq!(doc.spine[0].id, None);
1865 assert_eq!(doc.spine[0].linear, true);
1866 assert_eq!(doc.spine[0].properties, Some("untrustworthy".to_string()));
1867 }
1868
1869 #[test]
1873 fn test_pkg_title_order() {
1874 let epub_file = Path::new("./test_case/pkg-title-order.epub");
1875 let doc = EpubDoc::new(epub_file);
1876 assert!(doc.is_ok());
1877
1878 let doc = doc.unwrap();
1879 let title_list = doc.get_title();
1880 assert_eq!(title_list.len(), 6);
1881 assert_eq!(
1882 title_list,
1883 vec![
1884 "pkg-title-order",
1885 "This title must not display first",
1886 "Also, this title must not display first",
1887 "This title also must not display first",
1888 "This title must also not display first",
1889 "This title must not display first, also",
1890 ]
1891 );
1892 }
1893
1894 #[test]
1898 fn test_pkg_unique_id() {
1899 let epub_file = Path::new("./test_case/pkg-unique-id.epub");
1900 let doc_1 = EpubDoc::new(epub_file);
1901 assert!(doc_1.is_ok());
1902
1903 let epub_file = Path::new("./test_case/pkg-unique-id_duplicate.epub");
1904 let doc_2 = EpubDoc::new(epub_file);
1905 assert!(doc_2.is_ok());
1906
1907 let doc_1 = doc_1.unwrap();
1908 let doc_2 = doc_2.unwrap();
1909
1910 assert_eq!(doc_1.get_identifier(), doc_2.get_identifier());
1911 assert_eq!(doc_1.unique_identifier, "pkg-unique-id");
1912 assert_eq!(doc_2.unique_identifier, "pkg-unique-id");
1913 }
1914
1915 #[test]
1919 fn test_pkg_version_backward() {
1920 let epub_file = Path::new("./test_case/pkg-version-backward.epub");
1921 let doc = EpubDoc::new(epub_file);
1922 assert!(doc.is_ok());
1923
1924 let doc = doc.unwrap();
1925 assert_eq!(doc.version, EpubVersion::Version3_0);
1926 }
1927
1928 #[test]
1932 fn test_pkg_linked_records() {
1933 let epub_file = Path::new("./test_case/pkg-linked-records.epub");
1934 let doc = EpubDoc::new(epub_file);
1935 assert!(doc.is_ok());
1936
1937 let doc = doc.unwrap();
1938 assert_eq!(doc.metadata_link.len(), 3);
1939
1940 let item = doc.metadata_link.iter().find(|&item| {
1941 if let Some(properties) = &item.properties {
1942 properties.eq("onix")
1943 } else {
1944 false
1945 }
1946 });
1947 assert!(item.is_some());
1948 }
1949
1950 #[test]
1954 fn test_pkg_manifest_unlisted_resource() {
1955 let epub_file = Path::new("./test_case/pkg-manifest-unlisted-resource.epub");
1956 let doc = EpubDoc::new(epub_file);
1957 assert!(doc.is_ok());
1958
1959 let doc = doc.unwrap();
1960 assert!(
1961 doc.get_manifest_item_by_path("EPUB/content_001.xhtml")
1962 .is_ok()
1963 );
1964
1965 assert!(doc.get_manifest_item_by_path("EPUB/red.png").is_err());
1966 let err = doc.get_manifest_item_by_path("EPUB/red.png").unwrap_err();
1967 assert_eq!(
1968 err.to_string(),
1969 "Resource not found: Unable to find resource from \"EPUB/red.png\"."
1970 );
1971 }
1972 }
1973
1974 mod manifest_fallbacks_tests {
1978 use std::path::Path;
1979
1980 use crate::epub::EpubDoc;
1981
1982 #[test]
1986 fn test_pub_foreign_bad_fallback() {
1987 let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
1988 let doc = EpubDoc::new(epub_file);
1989 assert!(doc.is_ok());
1990
1991 let doc = doc.unwrap();
1992 assert!(doc.get_manifest_item("content_001").is_ok());
1993 assert!(doc.get_manifest_item("bar").is_ok());
1994
1995 assert_eq!(
1996 doc.get_manifest_item_with_fallback("content_001", &vec!["application/xhtml+xml"])
1997 .unwrap_err()
1998 .to_string(),
1999 "No supported file format: The fallback resource does not contain the file format you support."
2000 );
2001 }
2002
2003 #[test]
2007 fn test_pub_foreign_image() {
2008 let epub_file = Path::new("./test_case/pub-foreign_image.epub");
2009 let doc = EpubDoc::new(epub_file);
2010 assert!(doc.is_ok());
2011
2012 let doc = doc.unwrap();
2013 let result = doc.get_manifest_item_with_fallback(
2014 "image-tiff",
2015 &vec!["image/png", "application/xhtml+xml"],
2016 );
2017 assert!(result.is_ok());
2018
2019 let (_, mime) = result.unwrap();
2020 assert_eq!(mime, "image/png");
2021 }
2022
2023 #[test]
2027 fn test_pub_foreign_json_spine() {
2028 let epub_file = Path::new("./test_case/pub-foreign_json-spine.epub");
2029 let doc = EpubDoc::new(epub_file);
2030 assert!(doc.is_ok());
2031
2032 let doc = doc.unwrap();
2033 let result = doc.get_manifest_item_with_fallback(
2034 "content_primary",
2035 &vec!["application/xhtml+xml", "application/json"],
2036 );
2037 assert!(result.is_ok());
2038 let (_, mime) = result.unwrap();
2039 assert_eq!(mime, "application/json");
2040
2041 let result = doc
2042 .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
2043 assert!(result.is_ok());
2044 let (_, mime) = result.unwrap();
2045 assert_eq!(mime, "application/xhtml+xml");
2046 }
2047
2048 #[test]
2052 fn test_pub_foreign_xml_spine() {
2053 let epub_file = Path::new("./test_case/pub-foreign_xml-spine.epub");
2054 let doc = EpubDoc::new(epub_file);
2055 assert!(doc.is_ok());
2056
2057 let doc = doc.unwrap();
2058 let result = doc.get_manifest_item_with_fallback(
2059 "content_primary",
2060 &vec!["application/xhtml+xml", "application/xml"],
2061 );
2062 assert!(result.is_ok());
2063 let (_, mime) = result.unwrap();
2064 assert_eq!(mime, "application/xml");
2065
2066 let result = doc
2067 .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
2068 assert!(result.is_ok());
2069 let (_, mime) = result.unwrap();
2070 assert_eq!(mime, "application/xhtml+xml");
2071 }
2072
2073 #[test]
2077 fn test_pub_foreign_xml_suffix_spine() {
2078 let epub_file = Path::new("./test_case/pub-foreign_xml-suffix-spine.epub");
2079 let doc = EpubDoc::new(epub_file);
2080 assert!(doc.is_ok());
2081
2082 let doc = doc.unwrap();
2083 let result = doc.get_manifest_item_with_fallback(
2084 "content_primary",
2085 &vec!["application/xhtml+xml", "application/dtc+xml"],
2086 );
2087 assert!(result.is_ok());
2088 let (_, mime) = result.unwrap();
2089 assert_eq!(mime, "application/dtc+xml");
2090
2091 let result = doc
2092 .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
2093 assert!(result.is_ok());
2094 let (_, mime) = result.unwrap();
2095 assert_eq!(mime, "application/xhtml+xml");
2096 }
2097 }
2098
2099 mod open_container_format_tests {
2101 use std::{cmp::min, io::Read, path::Path};
2102
2103 use sha1::{Digest, Sha1};
2104
2105 use crate::epub::EpubDoc;
2106
2107 #[test]
2111 fn test_ocf_metainf_inc() {
2112 let epub_file = Path::new("./test_case/ocf-metainf-inc.epub");
2113 let doc = EpubDoc::new(epub_file);
2114 assert!(doc.is_ok());
2115 }
2116
2117 #[test]
2121 fn test_ocf_metainf_manifest() {
2122 let epub_file = Path::new("./test_case/ocf-metainf-manifest.epub");
2123 let doc = EpubDoc::new(epub_file);
2124 assert!(doc.is_ok());
2125 }
2126
2127 #[test]
2131 fn test_ocf_package_arbitrary() {
2132 let epub_file = Path::new("./test_case/ocf-package_arbitrary.epub");
2133 let doc = EpubDoc::new(epub_file);
2134 assert!(doc.is_ok());
2135
2136 let doc = doc.unwrap();
2137 assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2138 }
2139
2140 #[test]
2144 fn test_ocf_package_multiple() {
2145 let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2146 let doc = EpubDoc::new(epub_file);
2147 assert!(doc.is_ok());
2148
2149 let doc = doc.unwrap();
2150 assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2151 assert_eq!(doc.base_path, Path::new("FOO/BAR"));
2152 }
2153
2154 #[test]
2158 fn test_ocf_url_link_leaking_relative() {
2159 let epub_file = Path::new("./test_case/ocf-url_link-leaking-relative.epub");
2160 let doc = EpubDoc::new(epub_file);
2161 assert!(doc.is_err());
2162 assert_eq!(
2163 doc.err().unwrap().to_string(),
2164 String::from(
2165 "Relative link leakage: Path \"../../../../media/imgs/monastery.jpg\" is out of container range."
2166 )
2167 )
2168 }
2169
2170 #[test]
2174 fn test_ocf_url_link_path_absolute() {
2175 let epub_file = Path::new("./test_case/ocf-url_link-path-absolute.epub");
2176 let doc = EpubDoc::new(epub_file);
2177 assert!(doc.is_ok());
2178
2179 let doc = doc.unwrap();
2180 let resource = doc.manifest.get("photo").unwrap();
2181 assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2182 }
2183
2184 #[test]
2188 fn test_ocf_url_link_relative() {
2189 let epub_file = Path::new("./test_case/ocf-url_link-relative.epub");
2190 let doc = EpubDoc::new(epub_file);
2191 assert!(doc.is_ok());
2192
2193 let doc = doc.unwrap();
2194 let resource = doc.manifest.get("photo").unwrap();
2195 assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2196 }
2197
2198 #[test]
2202 fn test_ocf_url_manifest() {
2203 let epub_file = Path::new("./test_case/ocf-url_manifest.epub");
2204 let doc = EpubDoc::new(epub_file);
2205 assert!(doc.is_ok());
2206
2207 let doc = doc.unwrap();
2208 assert!(doc.get_manifest_item("nav").is_ok());
2209 assert!(doc.get_manifest_item("content_001").is_ok());
2210 assert!(doc.get_manifest_item("content_002").is_err());
2211 }
2212
2213 #[test]
2217 fn test_ocf_url_relative() {
2218 let epub_file = Path::new("./test_case/ocf-url_relative.epub");
2219 let doc = EpubDoc::new(epub_file);
2220 assert!(doc.is_ok());
2221
2222 let doc = doc.unwrap();
2223 assert_eq!(doc.package_path, Path::new("foo/BAR/baz.opf"));
2224 assert_eq!(doc.base_path, Path::new("foo/BAR"));
2225 assert_eq!(
2226 doc.manifest.get("nav").unwrap().path,
2227 Path::new("foo/BAR/nav.xhtml")
2228 );
2229 assert_eq!(
2230 doc.manifest.get("content_001").unwrap().path,
2231 Path::new("foo/BAR/qux/content_001.xhtml")
2232 );
2233 assert!(doc.get_manifest_item("nav").is_ok());
2234 assert!(doc.get_manifest_item("content_001").is_ok());
2235 }
2236
2237 #[test]
2242 fn test_ocf_zip_comp() {
2243 let epub_file = Path::new("./test_case/ocf-zip-comp.epub");
2244 let doc = EpubDoc::new(epub_file);
2245 assert!(doc.is_ok());
2246 }
2247
2248 #[test]
2253 fn test_ocf_zip_mult() {
2254 let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2255 let doc = EpubDoc::new(epub_file);
2256 assert!(doc.is_ok());
2257 }
2258
2259 #[test]
2263 fn test_ocf_font_obfuscation() {
2264 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2265 let doc = EpubDoc::new(epub_file);
2266 assert!(doc.is_ok());
2267
2268 let doc = doc.unwrap();
2269 let unique_id = doc.unique_identifier.clone();
2270
2271 let mut hasher = Sha1::new();
2272 hasher.update(unique_id.as_bytes());
2273 let hash = hasher.finalize();
2274 let mut key = vec![0u8; 1040];
2275 for i in 0..1040 {
2276 key[i] = hash[i % hash.len()];
2277 }
2278
2279 assert!(doc.encryption.is_some());
2280 assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2281
2282 let data = &doc.encryption.unwrap()[0];
2283 assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2284
2285 let font_file = doc
2286 .archive
2287 .lock()
2288 .unwrap()
2289 .by_name(&data.data)
2290 .unwrap()
2291 .bytes()
2292 .collect::<Result<Vec<u8>, _>>();
2293 assert!(font_file.is_ok());
2294 let font_file = font_file.unwrap();
2295
2296 let mut deobfuscated = font_file.clone();
2298 for i in 0..min(1040, deobfuscated.len()) {
2299 deobfuscated[i] ^= key[i];
2300 }
2301
2302 assert!(is_valid_font(&deobfuscated));
2303 }
2304
2305 #[test]
2309 fn test_ocf_font_obfuscation_bis() {
2310 let epub_file = Path::new("./test_case/ocf-font_obfuscation_bis.epub");
2311 let doc = EpubDoc::new(epub_file);
2312 assert!(doc.is_ok());
2313
2314 let doc = doc.unwrap();
2315
2316 let wrong_unique_id = "wrong-publication-id";
2317 let mut hasher = Sha1::new();
2318 hasher.update(wrong_unique_id.as_bytes());
2319 let hash = hasher.finalize();
2320 let mut wrong_key = vec![0u8; 1040];
2321 for i in 0..1040 {
2322 wrong_key[i] = hash[i % hash.len()];
2323 }
2324
2325 assert!(doc.encryption.is_some());
2326 assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2327
2328 let data = &doc.encryption.unwrap()[0];
2329 assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2330
2331 let font_file = doc
2332 .archive
2333 .lock()
2334 .unwrap()
2335 .by_name(&data.data)
2336 .unwrap()
2337 .bytes()
2338 .collect::<Result<Vec<u8>, _>>();
2339 assert!(font_file.is_ok());
2340 let font_file = font_file.unwrap();
2341
2342 let mut deobfuscated_with_wrong_key = font_file.clone();
2344 for i in 0..std::cmp::min(1040, deobfuscated_with_wrong_key.len()) {
2345 deobfuscated_with_wrong_key[i] ^= wrong_key[i];
2346 }
2347
2348 assert!(!is_valid_font(&deobfuscated_with_wrong_key));
2349 }
2350
2351 fn is_valid_font(data: &[u8]) -> bool {
2352 if data.len() < 4 {
2353 return false;
2354 }
2355 let sig = &data[0..4];
2356 sig == b"OTTO"
2359 || sig == b"\x00\x01\x00\x00"
2360 || sig == b"\x00\x02\x00\x00"
2361 || sig == b"true"
2362 || sig == b"typ1"
2363 }
2364 }
2365
2366 #[test]
2367 fn test_parse_container() {
2368 let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2369 let doc = EpubDoc::new(epub_file);
2370 assert!(doc.is_ok());
2371
2372 let container = r#"
2374 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2375 <rootfiles></rootfiles>
2376 </container>
2377 "#
2378 .to_string();
2379
2380 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2381 assert!(result.is_err());
2382 assert_eq!(
2383 result.unwrap_err(),
2384 EpubError::NonCanonicalFile { tag: "rootfile".to_string() }
2385 );
2386
2387 let container = r#"
2388 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2389 <rootfiles>
2390 <rootfile media-type="application/oebps-package+xml"/>
2391 </rootfiles>
2392 </container>
2393 "#
2394 .to_string();
2395
2396 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2397 assert!(result.is_err());
2398 assert_eq!(
2399 result.unwrap_err(),
2400 EpubError::MissingRequiredAttribute {
2401 tag: "rootfile".to_string(),
2402 attribute: "full-path".to_string(),
2403 }
2404 );
2405
2406 let container = r#"
2407 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2408 <rootfiles>
2409 <rootfile media-type="application/oebps-package+xml" full-path="EPUB/content.opf"/>
2410 </rootfiles>
2411 </container>
2412 "#
2413 .to_string();
2414
2415 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2416 assert!(result.is_ok());
2417 assert_eq!(result.unwrap(), PathBuf::from("EPUB/content.opf"))
2418 }
2419
2420 #[test]
2421 fn test_parse_manifest() {
2422 let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2423 let doc = EpubDoc::new(epub_file);
2424 assert!(doc.is_ok());
2425
2426 let manifest = r#"
2427 <manifest>
2428 <item href="content_001.xhtml" media-type="application/xhtml+xml"/>
2429 <item properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2430 </manifest>
2431 "#;
2432 let mut doc = doc.unwrap();
2433 let element = XmlReader::parse(manifest);
2434 assert!(element.is_ok());
2435
2436 let element = element.unwrap();
2437 let result = doc.parse_manifest(&element);
2438 assert!(result.is_err());
2439 assert_eq!(
2440 result.unwrap_err(),
2441 EpubError::MissingRequiredAttribute {
2442 tag: "item".to_string(),
2443 attribute: "id".to_string(),
2444 },
2445 );
2446
2447 let manifest = r#"
2448 <manifest>
2449 <item id="content_001" media-type="application/xhtml+xml"/>
2450 <item id="nav" properties="nav" media-type="application/xhtml+xml"/>
2451 </manifest>
2452 "#;
2453 let element = XmlReader::parse(manifest);
2454 assert!(element.is_ok());
2455
2456 let element = element.unwrap();
2457 let result = doc.parse_manifest(&element);
2458 assert!(result.is_err());
2459 assert_eq!(
2460 result.unwrap_err(),
2461 EpubError::MissingRequiredAttribute {
2462 tag: "item".to_string(),
2463 attribute: "href".to_string(),
2464 },
2465 );
2466
2467 let manifest = r#"
2468 <manifest>
2469 <item id="content_001" href="content_001.xhtml"/>
2470 <item id="nav" properties="nav" href="nav.xhtml"/>
2471 </manifest>
2472 "#;
2473 let element = XmlReader::parse(manifest);
2474 assert!(element.is_ok());
2475
2476 let element = element.unwrap();
2477 let result = doc.parse_manifest(&element);
2478 assert!(result.is_err());
2479 assert_eq!(
2480 result.unwrap_err(),
2481 EpubError::MissingRequiredAttribute {
2482 tag: "item".to_string(),
2483 attribute: "media-type".to_string(),
2484 },
2485 );
2486
2487 let manifest = r#"
2488 <manifest>
2489 <item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
2490 <item id="nav" properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2491 </manifest>
2492 "#;
2493 let element = XmlReader::parse(manifest);
2494 assert!(element.is_ok());
2495
2496 let element = element.unwrap();
2497 let result = doc.parse_manifest(&element);
2498 assert!(result.is_ok());
2499 }
2500
2501 #[test]
2503 fn test_fn_has_encryption() {
2504 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2505 let doc = EpubDoc::new(epub_file);
2506 assert!(doc.is_ok());
2507
2508 let doc = doc.unwrap();
2509 assert!(doc.has_encryption());
2510 }
2511
2512 #[test]
2514 fn test_fn_parse_encryption() {
2515 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2516 let doc = EpubDoc::new(epub_file);
2517 assert!(doc.is_ok());
2518
2519 let doc = doc.unwrap();
2520 assert!(doc.encryption.is_some());
2521
2522 let encryption = doc.encryption.unwrap();
2523 assert_eq!(encryption.len(), 1);
2524 assert_eq!(encryption[0].method, "http://www.idpf.org/2008/embedding");
2525 assert_eq!(encryption[0].data, "EPUB/fonts/Lobster.ttf");
2526 }
2527
2528 #[test]
2529 fn test_get_metadata_existing_key() {
2530 let epub_file = Path::new("./test_case/epub-33.epub");
2531 let doc = EpubDoc::new(epub_file);
2532 assert!(doc.is_ok());
2533
2534 let doc = doc.unwrap();
2535
2536 let titles = doc.get_metadata("title");
2537 assert!(titles.is_some());
2538
2539 let titles = titles.unwrap();
2540 assert_eq!(titles.len(), 1);
2541 assert_eq!(titles[0].property, "title");
2542 assert_eq!(titles[0].value, "EPUB 3.3");
2543
2544 let languages = doc.get_metadata("language");
2545 assert!(languages.is_some());
2546
2547 let languages = languages.unwrap();
2548 assert_eq!(languages.len(), 1);
2549 assert_eq!(languages[0].property, "language");
2550 assert_eq!(languages[0].value, "en-us");
2551
2552 let language = doc.get_language();
2553 assert_eq!(language, vec!["en-us"]);
2554 }
2555
2556 #[test]
2557 fn test_get_metadata_nonexistent_key() {
2558 let epub_file = Path::new("./test_case/epub-33.epub");
2559 let doc = EpubDoc::new(epub_file);
2560 assert!(doc.is_ok());
2561
2562 let doc = doc.unwrap();
2563 let metadata = doc.get_metadata("nonexistent");
2564 assert!(metadata.is_none());
2565 }
2566
2567 #[test]
2568 fn test_get_metadata_multiple_items_same_type() {
2569 let epub_file = Path::new("./test_case/epub-33.epub");
2570 let doc = EpubDoc::new(epub_file);
2571 assert!(doc.is_ok());
2572
2573 let doc = doc.unwrap();
2574
2575 let creators = doc.get_metadata("creator");
2576 assert!(creators.is_some());
2577
2578 let creators = creators.unwrap();
2579 assert_eq!(creators.len(), 3);
2580
2581 assert_eq!(creators[0].id, Some("creator_id_0".to_string()));
2582 assert_eq!(creators[0].property, "creator");
2583 assert_eq!(creators[0].value, "Matt Garrish, DAISY Consortium");
2584
2585 assert_eq!(creators[1].id, Some("creator_id_1".to_string()));
2586 assert_eq!(creators[1].property, "creator");
2587 assert_eq!(creators[1].value, "Ivan Herman, W3C");
2588
2589 assert_eq!(creators[2].id, Some("creator_id_2".to_string()));
2590 assert_eq!(creators[2].property, "creator");
2591 assert_eq!(creators[2].value, "Dave Cramer, Invited Expert");
2592 }
2593
2594 #[test]
2595 fn test_get_metadata_with_refinement() {
2596 let epub_file = Path::new("./test_case/epub-33.epub");
2597 let doc = EpubDoc::new(epub_file);
2598 assert!(doc.is_ok());
2599
2600 let doc = doc.unwrap();
2601
2602 let title = doc.get_metadata("title");
2603 assert!(title.is_some());
2604
2605 let title = title.unwrap();
2606 assert_eq!(title.len(), 1);
2607 assert_eq!(title[0].refined.len(), 1);
2608 assert_eq!(title[0].refined[0].property, "title-type");
2609 assert_eq!(title[0].refined[0].value, "main");
2610 }
2611
2612 #[test]
2613 fn test_get_manifest_item_with_fallback() {
2614 let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
2615 let doc = EpubDoc::new(epub_file);
2616 assert!(doc.is_ok());
2617
2618 let doc = doc.unwrap();
2619 assert!(doc.get_manifest_item("content_001").is_ok());
2620 assert!(doc.get_manifest_item("bar").is_ok());
2621
2622 if let Ok((_, mime)) =
2624 doc.get_manifest_item_with_fallback("content_001", &vec!["image/psd"])
2625 {
2626 assert_eq!(mime, "image/psd");
2627 } else {
2628 assert!(false, "get_manifest_item_with_fallback failed");
2629 }
2630
2631 assert_eq!(
2633 doc.get_manifest_item_with_fallback("content_001", &vec!["application/xhtml+xml"])
2634 .unwrap_err()
2635 .to_string(),
2636 "No supported file format: The fallback resource does not contain the file format you support."
2637 );
2638 }
2639
2640 #[test]
2641 fn test_get_cover() {
2642 let epub_file = Path::new("./test_case/pkg-cover-image.epub");
2643 let doc = EpubDoc::new(epub_file);
2644 if let Err(err) = &doc {
2645 println!("{}", err);
2646 }
2647 assert!(doc.is_ok());
2648
2649 let doc = doc.unwrap();
2650 let result = doc.get_cover();
2651 assert!(result.is_some());
2652
2653 let (data, mime) = result.unwrap();
2654 assert_eq!(data.len(), 5785);
2655 assert_eq!(mime, "image/jpeg");
2656 }
2657
2658 #[test]
2659 fn test_epub_2() {
2660 let epub_file = Path::new("./test_case/epub-2.epub");
2661 let doc = EpubDoc::new(epub_file);
2662 assert!(doc.is_ok());
2663
2664 let doc = doc.unwrap();
2665
2666 let titles = doc.get_title();
2667 assert_eq!(titles, vec!["Minimal EPUB 2.0"]);
2668 }
2669
2670 #[test]
2671 fn test_is_valid_epub_valid_file() {
2672 let result = EpubDoc::is_valid_epub("./test_case/epub-2.epub");
2673 assert!(result.is_ok());
2674 assert_eq!(result.unwrap(), true);
2675 }
2676
2677 #[test]
2678 fn test_is_valid_epub_invalid_path() {
2679 let result = EpubDoc::is_valid_epub("./test_case/nonexistent.epub");
2680 assert!(result.is_err());
2681 }
2682
2683 #[test]
2684 fn test_is_valid_epub_corrupted_zip() {
2685 let temp_dir = std::env::temp_dir();
2686 let corrupted_file = temp_dir.join("corrupted.epub");
2687
2688 std::fs::write(&corrupted_file, b"not a valid zip file").unwrap();
2689
2690 let result = EpubDoc::is_valid_epub(&corrupted_file);
2691
2692 assert!(result.is_err());
2693 let err = result.unwrap_err();
2694 assert!(matches!(err, EpubError::ArchiveError { .. }));
2695
2696 std::fs::remove_file(corrupted_file).ok();
2697 }
2698
2699 #[test]
2700 fn test_is_valid_epub_valid_epub_3() {
2701 let result = EpubDoc::is_valid_epub("./test_case/epub-33.epub");
2702 assert!(result.is_ok());
2703 assert_eq!(result.unwrap(), true);
2704 }
2705
2706 #[test]
2707 fn test_is_outside_error() {
2708 let archive_error = EpubError::ArchiveError {
2709 source: zip::result::ZipError::Io(std::io::Error::new(
2710 std::io::ErrorKind::Other,
2711 "test",
2712 )),
2713 };
2714 assert!(EpubDoc::<BufReader<File>>::is_outside_error(&archive_error));
2715
2716 let io_error = EpubError::IOError {
2717 source: std::io::Error::new(std::io::ErrorKind::NotFound, "test"),
2718 };
2719 assert!(EpubDoc::<BufReader<File>>::is_outside_error(&io_error));
2720
2721 let non_canonical = EpubError::NonCanonicalEpub { expected_file: "test".to_string() };
2722 assert!(!EpubDoc::<BufReader<File>>::is_outside_error(
2723 &non_canonical
2724 ));
2725
2726 let missing_attr = EpubError::MissingRequiredAttribute {
2727 tag: "test".to_string(),
2728 attribute: "id".to_string(),
2729 };
2730 assert!(!EpubDoc::<BufReader<File>>::is_outside_error(&missing_attr));
2731 }
2732
2733 mod metadata_sheet_tests {
2734 use crate::epub::EpubDoc;
2735 use std::path::Path;
2736
2737 #[test]
2738 fn test_get_metadata_sheet_basic_fields() {
2739 let epub_file = Path::new("./test_case/epub-33.epub");
2740 let doc = EpubDoc::new(epub_file);
2741 assert!(doc.is_ok());
2742
2743 let doc = doc.unwrap();
2744 let sheet = doc.get_metadata_sheet();
2745
2746 assert_eq!(sheet.title.len(), 1);
2747 assert_eq!(sheet.title[0], "EPUB 3.3");
2748
2749 assert_eq!(sheet.language.len(), 1);
2750 assert_eq!(sheet.language[0], "en-us");
2751
2752 assert_eq!(sheet.publisher, "World Wide Web Consortium");
2753
2754 assert_eq!(
2755 sheet.rights,
2756 "https://www.w3.org/Consortium/Legal/2015/doc-license"
2757 );
2758 }
2759
2760 #[test]
2761 fn test_get_metadata_sheet_multiple_creators() {
2762 let epub_file = Path::new("./test_case/epub-33.epub");
2763 let doc = EpubDoc::new(epub_file);
2764 assert!(doc.is_ok());
2765
2766 let doc = doc.unwrap();
2767 let sheet = doc.get_metadata_sheet();
2768
2769 assert_eq!(sheet.creator.len(), 3);
2770 assert_eq!(sheet.creator[0], "Matt Garrish, DAISY Consortium");
2771 assert_eq!(sheet.creator[1], "Ivan Herman, W3C");
2772 assert_eq!(sheet.creator[2], "Dave Cramer, Invited Expert");
2773 }
2774
2775 #[test]
2776 fn test_get_metadata_sheet_multiple_subjects() {
2777 let epub_file = Path::new("./test_case/epub-33.epub");
2778 let doc = EpubDoc::new(epub_file);
2779 assert!(doc.is_ok());
2780
2781 let doc = doc.unwrap();
2782 let sheet = doc.get_metadata_sheet();
2783
2784 assert_eq!(sheet.subject.len(), 2);
2785 assert_eq!(sheet.subject[0], "Information systems~World Wide Web");
2786 assert_eq!(
2787 sheet.subject[1],
2788 "General and reference~Computing standards, RFCs and guidelines"
2789 );
2790 }
2791
2792 #[test]
2793 fn test_get_metadata_sheet_identifier_with_id() {
2794 let epub_file = Path::new("./test_case/epub-33.epub");
2795 let doc = EpubDoc::new(epub_file);
2796 assert!(doc.is_ok());
2797
2798 let doc = doc.unwrap();
2799 let sheet = doc.get_metadata_sheet();
2800
2801 assert!(sheet.identifier.contains_key("pub-id"));
2802 assert_eq!(
2803 sheet.identifier.get("pub-id"),
2804 Some(&"https://www.w3.org/TR/epub-33/".to_string())
2805 );
2806 }
2807
2808 #[test]
2809 fn test_get_metadata_sheet_missing_scalar_fields() {
2810 let epub_file = Path::new("./test_case/epub-33.epub");
2811 let doc = EpubDoc::new(epub_file);
2812 assert!(doc.is_ok());
2813
2814 let doc = doc.unwrap();
2815 let sheet = doc.get_metadata_sheet();
2816
2817 assert!(sheet.coverage.is_empty());
2818 assert!(sheet.description.is_empty());
2819 assert!(sheet.format.is_empty());
2820 assert!(sheet.source.is_empty());
2821 assert!(sheet.epub_type.is_empty());
2822 assert!(sheet.contributor.is_empty());
2823 assert!(sheet.relation.is_empty());
2824 }
2825
2826 #[test]
2827 fn test_get_metadata_sheet_title_refinement_via_get_metadata() {
2828 let epub_file = Path::new("./test_case/epub-33.epub");
2829 let doc = EpubDoc::new(epub_file);
2830 assert!(doc.is_ok());
2831
2832 let doc = doc.unwrap();
2833 let title_metadata = doc.get_metadata("title");
2834 assert!(title_metadata.is_some());
2835
2836 let title_metadata = title_metadata.unwrap();
2837 assert_eq!(title_metadata.len(), 1);
2838 assert_eq!(title_metadata[0].refined.len(), 1);
2839 assert_eq!(title_metadata[0].refined[0].property, "title-type");
2840 assert_eq!(title_metadata[0].refined[0].value, "main");
2841
2842 let sheet = doc.get_metadata_sheet();
2843 assert_eq!(sheet.title.len(), 1);
2844 assert_eq!(sheet.title[0], "EPUB 3.3");
2845 }
2846
2847 #[test]
2848 fn test_get_metadata_sheet_ignores_unknown_properties() {
2849 let epub_file = Path::new("./test_case/epub-33.epub");
2850 let doc = EpubDoc::new(epub_file);
2851 assert!(doc.is_ok());
2852
2853 let doc = doc.unwrap();
2854 let sheet = doc.get_metadata_sheet();
2855
2856 assert_eq!(sheet.title.len(), 1);
2857 assert_eq!(sheet.creator.len(), 3);
2858 assert_eq!(sheet.subject.len(), 2);
2859 }
2860
2861 #[test]
2862 fn test_get_metadata_sheet_idempotent() {
2863 let epub_file = Path::new("./test_case/epub-33.epub");
2864 let doc = EpubDoc::new(epub_file);
2865 assert!(doc.is_ok());
2866
2867 let doc = doc.unwrap();
2868 let sheet1 = doc.get_metadata_sheet();
2869 let sheet2 = doc.get_metadata_sheet();
2870
2871 assert_eq!(sheet1.title, sheet2.title);
2872 assert_eq!(sheet1.creator, sheet2.creator);
2873 assert_eq!(sheet1.language, sheet2.language);
2874 assert_eq!(sheet1.identifier, sheet2.identifier);
2875 assert_eq!(sheet1.date, sheet2.date);
2876 }
2877 }
2878}