1use std::{
24 collections::HashMap,
25 fs::{self, File},
26 io::{BufReader, Read, Seek},
27 path::{Path, PathBuf},
28 sync::{
29 Arc, Mutex,
30 atomic::{AtomicUsize, Ordering},
31 },
32};
33
34#[cfg(not(feature = "no-indexmap"))]
35use indexmap::IndexMap;
36use zip::{ZipArchive, result::ZipError};
37
38use crate::{
39 error::EpubError,
40 types::{
41 EncryptionData, EpubVersion, ManifestItem, MetadataItem, MetadataLinkItem,
42 MetadataRefinement, MetadataSheet, NavPoint, SpineItem,
43 },
44 utils::{
45 DecodeBytes, NormalizeWhitespace, XmlElement, XmlReader, adobe_font_dencryption,
46 check_realtive_link_leakage, compression_method_check, get_file_in_zip_archive,
47 idpf_font_dencryption,
48 },
49};
50
51pub struct EpubDoc<R: Read + Seek> {
77 pub(crate) archive: Arc<Mutex<ZipArchive<R>>>,
79
80 pub(crate) epub_path: PathBuf,
82
83 pub package_path: PathBuf,
85
86 pub base_path: PathBuf,
88
89 pub version: EpubVersion,
91
92 pub unique_identifier: String,
96
97 pub metadata: Vec<MetadataItem>,
99
100 pub metadata_link: Vec<MetadataLinkItem>,
102
103 #[cfg(not(feature = "no-indexmap"))]
124 pub manifest: IndexMap<String, ManifestItem>,
125 #[cfg(feature = "no-indexmap")]
126 pub manifest: HashMap<String, ManifestItem>,
127
128 pub spine: Vec<SpineItem>,
133
134 pub encryption: Option<Vec<EncryptionData>>,
136
137 pub catalog: Vec<NavPoint>,
139
140 pub catalog_title: String,
142
143 current_spine_index: AtomicUsize,
145
146 has_encryption: bool,
148}
149
150impl<R: Read + Seek> EpubDoc<R> {
151 pub fn from_reader(reader: R, epub_path: PathBuf) -> Result<Self, EpubError> {
171 let mut archive = ZipArchive::new(reader).map_err(EpubError::from)?;
181 let epub_path = fs::canonicalize(epub_path)?;
182
183 compression_method_check(&mut archive)?;
184
185 let container =
186 get_file_in_zip_archive(&mut archive, "META-INF/container.xml")?.decode()?;
187 let package_path = Self::parse_container(container)?;
188 let base_path = package_path
189 .parent()
190 .expect("the parent directory of the opf file must exist")
191 .to_path_buf();
192
193 let opf_file = get_file_in_zip_archive(
194 &mut archive,
195 package_path
196 .to_str()
197 .expect("package_path should be valid UTF-8"),
198 )?
199 .decode()?;
200 let package = XmlReader::parse(&opf_file)?;
201
202 let version = Self::determine_epub_version(&package)?;
203 let has_encryption = archive
204 .by_path(Path::new("META-INF/encryption.xml"))
205 .is_ok();
206
207 let mut doc = Self {
208 archive: Arc::new(Mutex::new(archive)),
209 epub_path,
210 package_path,
211 base_path,
212 version,
213 unique_identifier: String::new(),
214 metadata: vec![],
215 metadata_link: vec![],
216
217 #[cfg(feature = "no-indexmap")]
218 manifest: HashMap::new(),
219 #[cfg(not(feature = "no-indexmap"))]
220 manifest: IndexMap::new(),
221
222 spine: vec![],
223 encryption: None,
224 catalog: vec![],
225 catalog_title: String::new(),
226 current_spine_index: AtomicUsize::new(0),
227 has_encryption,
228 };
229
230 let metadata_element = package.find_elements_by_name("metadata").next().unwrap();
231 let manifest_element = package.find_elements_by_name("manifest").next().unwrap();
232 let spine_element = package.find_elements_by_name("spine").next().unwrap();
233
234 doc.parse_metadata(metadata_element)?;
235 doc.parse_manifest(manifest_element)?;
236 doc.parse_spine(spine_element)?;
237 doc.parse_encryption()?;
238 doc.parse_catalog()?;
239
240 doc.unique_identifier = if let Some(uid) = package.get_attr("unique-identifier") {
242 doc.metadata.iter().find(|item| {
243 item.property == "identifier" && item.id.as_ref().is_some_and(|id| id == &uid)
244 })
245 } else {
246 doc.metadata
247 .iter()
248 .find(|item| item.property == "identifier")
249 }
250 .map(|item| item.value.clone())
251 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "dc:identifier".to_string() })?;
252
253 Ok(doc)
254 }
255
256 fn parse_container(data: String) -> Result<PathBuf, EpubError> {
272 let root = XmlReader::parse(&data)?;
273 let rootfile = root
274 .find_elements_by_name("rootfile")
275 .next()
276 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "rootfile".to_string() })?;
277
278 let attr =
279 rootfile
280 .get_attr("full-path")
281 .ok_or_else(|| EpubError::MissingRequiredAttribute {
282 tag: "rootfile".to_string(),
283 attribute: "full-path".to_string(),
284 })?;
285
286 Ok(PathBuf::from(attr))
287 }
288
289 fn parse_metadata(&mut self, metadata_element: &XmlElement) -> Result<(), EpubError> {
300 const DC_NAMESPACE: &str = "http://purl.org/dc/elements/1.1/";
301 const OPF_NAMESPACE: &str = "http://www.idpf.org/2007/opf";
302
303 let mut metadata = Vec::new();
304 let mut metadata_link = Vec::new();
305 let mut refinements = HashMap::<String, Vec<MetadataRefinement>>::new();
306
307 for element in metadata_element.children() {
308 match &element.namespace {
309 Some(namespace) if namespace == DC_NAMESPACE => {
310 self.parse_dc_metadata(element, &mut metadata)?
311 }
312
313 Some(namespace) if namespace == OPF_NAMESPACE => self.parse_opf_metadata(
314 element,
315 &mut metadata,
316 &mut metadata_link,
317 &mut refinements,
318 )?,
319
320 _ => {}
321 };
322 }
323
324 for item in metadata.iter_mut() {
325 if let Some(id) = &item.id {
326 if let Some(refinements) = refinements.remove(id) {
327 item.refined = refinements;
328 }
329 }
330 }
331
332 self.metadata = metadata;
333 self.metadata_link = metadata_link;
334 Ok(())
335 }
336
337 fn parse_manifest(&mut self, manifest_element: &XmlElement) -> Result<(), EpubError> {
347 let estimated_items = manifest_element.children().count();
348 #[cfg(feature = "no-indexmap")]
349 let mut resources = HashMap::with_capacity(estimated_items);
350 #[cfg(not(feature = "no-indexmap"))]
351 let mut resources = IndexMap::with_capacity(estimated_items);
352
353 for element in manifest_element.children() {
354 let id = element
355 .get_attr("id")
356 .ok_or_else(|| EpubError::MissingRequiredAttribute {
357 tag: element.tag_name(),
358 attribute: "id".to_string(),
359 })?
360 .to_string();
361 let path = element
362 .get_attr("href")
363 .ok_or_else(|| EpubError::MissingRequiredAttribute {
364 tag: element.tag_name(),
365 attribute: "href".to_string(),
366 })?
367 .to_string();
368 let mime = element
369 .get_attr("media-type")
370 .ok_or_else(|| EpubError::MissingRequiredAttribute {
371 tag: element.tag_name(),
372 attribute: "media-type".to_string(),
373 })?
374 .to_string();
375 let properties = element.get_attr("properties");
376 let fallback = element.get_attr("fallback");
377
378 resources.insert(
379 id.clone(),
380 ManifestItem {
381 id,
382 path: self.normalize_manifest_path(&path)?,
383 mime,
384 properties,
385 fallback,
386 },
387 );
388 }
389
390 self.manifest = resources;
391 self.validate_fallback_chains();
392 Ok(())
393 }
394
395 fn parse_spine(&mut self, spine_element: &XmlElement) -> Result<(), EpubError> {
405 let mut spine = Vec::new();
406 for element in spine_element.children() {
407 let idref = element
408 .get_attr("idref")
409 .ok_or_else(|| EpubError::MissingRequiredAttribute {
410 tag: element.tag_name(),
411 attribute: "idref".to_string(),
412 })?
413 .to_string();
414 let id = element.get_attr("id");
415 let linear = element
416 .get_attr("linear")
417 .map(|linear| linear == "yes")
418 .unwrap_or(true);
419 let properties = element.get_attr("properties");
420
421 spine.push(SpineItem { idref, id, linear, properties });
422 }
423
424 self.spine = spine;
425 Ok(())
426 }
427
428 fn parse_encryption(&mut self) -> Result<(), EpubError> {
438 if !self.has_encryption() {
439 return Ok(());
440 }
441
442 let mut archive = self.archive.lock()?;
443 let encryption_file =
444 get_file_in_zip_archive(&mut archive, "META-INF/encryption.xml")?.decode()?;
445
446 let root = XmlReader::parse(&encryption_file)?;
447
448 let mut encryption_data = Vec::new();
449 for data in root.children() {
450 if data.name != "EncryptedData" {
451 continue;
452 }
453
454 let method = data
455 .find_elements_by_name("EncryptionMethod")
456 .next()
457 .ok_or_else(|| EpubError::NonCanonicalFile {
458 tag: "EncryptionMethod".to_string(),
459 })?;
460 let reference = data
461 .find_elements_by_name("CipherReference")
462 .next()
463 .ok_or_else(|| EpubError::NonCanonicalFile {
464 tag: "CipherReference".to_string(),
465 })?;
466
467 encryption_data.push(EncryptionData {
468 method: method
469 .get_attr("Algorithm")
470 .ok_or_else(|| EpubError::MissingRequiredAttribute {
471 tag: "EncryptionMethod".to_string(),
472 attribute: "Algorithm".to_string(),
473 })?
474 .to_string(),
475 data: reference
476 .get_attr("URI")
477 .ok_or_else(|| EpubError::MissingRequiredAttribute {
478 tag: "CipherReference".to_string(),
479 attribute: "URI".to_string(),
480 })?
481 .to_string(),
482 });
483 }
484
485 if !encryption_data.is_empty() {
486 self.encryption = Some(encryption_data);
487 }
488
489 Ok(())
490 }
491
492 fn parse_catalog(&mut self) -> Result<(), EpubError> {
499 const HEAD_TAGS: [&str; 6] = ["h1", "h2", "h3", "h4", "h5", "h6"];
500
501 let mut archive = self.archive.lock()?;
502 match self.version {
503 EpubVersion::Version2_0 => {
504 let opf_file =
505 get_file_in_zip_archive(&mut archive, self.package_path.to_str().unwrap())?
506 .decode()?;
507 let opf_element = XmlReader::parse(&opf_file)?;
508
509 let toc_id = opf_element
510 .find_children_by_name("spine")
511 .next()
512 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?
513 .get_attr("toc")
514 .ok_or_else(|| EpubError::MissingRequiredAttribute {
515 tag: "spine".to_string(),
516 attribute: "toc".to_string(),
517 })?
518 .to_owned();
519 let toc_path = self
520 .manifest
521 .get(&toc_id)
522 .ok_or(EpubError::ResourceIdNotExist { id: toc_id })?
523 .path
524 .to_str()
525 .unwrap();
526
527 let ncx_file = get_file_in_zip_archive(&mut archive, toc_path)?.decode()?;
528 let ncx = XmlReader::parse(&ncx_file)?;
529
530 match ncx.find_elements_by_name("docTitle").next() {
531 Some(element) => self.catalog_title = element.text(),
532 None => log::warn!(
533 "Expecting to get docTitle information from the ncx file, but it's missing."
534 ),
535 };
536
537 let nav_map = ncx
538 .find_elements_by_name("navMap")
539 .next()
540 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "navMap".to_string() })?;
541
542 self.catalog = self.parse_nav_points(nav_map)?;
543
544 Ok(())
545 }
546
547 EpubVersion::Version3_0 => {
548 let nav_path = self
549 .manifest
550 .values()
551 .find(|item| {
552 if let Some(property) = &item.properties {
553 return property.contains("nav");
554 }
555 false
556 })
557 .map(|item| item.path.clone())
558 .ok_or_else(|| EpubError::NonCanonicalEpub {
559 expected_file: "Navigation Document".to_string(),
560 })?;
561
562 let nav_file =
563 get_file_in_zip_archive(&mut archive, nav_path.to_str().unwrap())?.decode()?;
564
565 let nav_element = XmlReader::parse(&nav_file)?;
566 let nav = nav_element
567 .find_elements_by_name("nav")
568 .find(|&element| element.get_attr("epub:type") == Some(String::from("toc")))
569 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "nav".to_string() })?;
570 let nav_title = nav.find_children_by_names(&HEAD_TAGS).next();
571 let nav_list = nav
572 .find_children_by_name("ol")
573 .next()
574 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "ol".to_string() })?;
575
576 self.catalog = self.parse_catalog_list(nav_list)?;
577 if let Some(nav_title) = nav_title {
578 self.catalog_title = nav_title.text();
579 };
580 Ok(())
581 }
582 }
583 }
584
585 #[inline]
601 pub fn has_encryption(&self) -> bool {
602 self.has_encryption
603 }
604
605 pub fn get_metadata(&self, key: &str) -> Option<Vec<MetadataItem>> {
619 let metadatas = self
620 .metadata
621 .iter()
622 .filter(|item| item.property == key)
623 .cloned()
624 .collect::<Vec<MetadataItem>>();
625
626 (!metadatas.is_empty()).then_some(metadatas)
627 }
628
629 pub fn get_metadata_value(&self, key: &str) -> Option<Vec<String>> {
641 let values = self
642 .metadata
643 .iter()
644 .filter(|item| item.property == key)
645 .map(|item| item.value.clone())
646 .collect::<Vec<String>>();
647
648 (!values.is_empty()).then_some(values)
649 }
650
651 #[inline]
664 pub fn get_title(&self) -> Vec<String> {
665 self.get_metadata_value("title")
666 .expect("missing required 'title' metadata which is required by the EPUB specification")
667 }
668
669 #[inline]
683 pub fn get_language(&self) -> Vec<String> {
684 self.get_metadata_value("language").expect(
685 "missing required 'language' metadata which is required by the EPUB specification",
686 )
687 }
688
689 #[inline]
705 pub fn get_identifier(&self) -> Vec<String> {
706 self.get_metadata_value("identifier").expect(
707 "missing required 'identifier' metadata which is required by the EPUB specification",
708 )
709 }
710
711 pub fn get_metadata_sheet(&self) -> MetadataSheet {
725 let mut sheet = MetadataSheet::new();
726 for item in &self.metadata {
727 let value = item.value.clone();
728
729 match item.property.as_str() {
730 "title" => {
731 sheet.title.push(value);
732 }
733 "creator" => {
734 sheet.creator.push(value);
735 }
736 "contributor" => {
737 sheet.contributor.push(value);
738 }
739 "subject" => {
740 sheet.subject.push(value);
741 }
742 "language" => {
743 sheet.language.push(value);
744 }
745 "relation" => {
746 sheet.relation.push(value);
747 }
748 "date" => {
749 let event = item
750 .refined
751 .iter()
752 .filter_map(|refine| {
753 if refine.property.eq("event") {
754 Some(refine.value.clone())
755 } else {
756 None
757 }
758 })
759 .next()
760 .unwrap_or_default();
761 sheet.date.insert(value, event);
762 }
763 "identifier" => {
764 let id = item.id.clone().unwrap_or_default();
765 sheet.identifier.insert(id, value);
766 }
767 "description" => {
768 sheet.description = value;
769 }
770 "format" => {
771 sheet.format = value;
772 }
773 "publisher" => {
774 sheet.publisher = value;
775 }
776 "rights" => {
777 sheet.rights = value;
778 }
779 "source" => {
780 sheet.source = value;
781 }
782 "ccoverage" => {
783 sheet.coverage = value;
784 }
785 "type" => {
786 sheet.epub_type = value;
787 }
788 _ => {}
789 };
790 }
791
792 sheet
793 }
794
795 pub fn get_manifest_item(&self, id: &str) -> Result<(Vec<u8>, String), EpubError> {
812 let resource_item = self
813 .manifest
814 .get(id)
815 .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
816
817 self.get_resource(resource_item)
818 }
819
820 pub fn get_manifest_item_by_path(&self, path: &str) -> Result<(Vec<u8>, String), EpubError> {
839 let manifest = self
840 .manifest
841 .iter()
842 .find(|(_, item)| item.path.to_str().unwrap() == path)
843 .map(|(_, manifest)| manifest)
844 .ok_or_else(|| EpubError::ResourceNotFound { resource: path.to_string() })?;
845
846 self.get_resource(manifest)
847 }
848
849 pub fn get_manifest_item_with_fallback(
865 &self,
866 id: &str,
867 supported_format: &[&str],
868 ) -> Result<(Vec<u8>, String), EpubError> {
869 let mut current_id = id;
870 let mut fallback_chain = Vec::<&str>::new();
871 'fallback: loop {
872 let manifest_item = self
873 .manifest
874 .get(current_id)
875 .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
876
877 if supported_format.contains(&manifest_item.mime.as_str()) {
878 return self.get_resource(manifest_item);
879 }
880
881 let fallback_id = match &manifest_item.fallback {
882 None => break 'fallback,
884
885 Some(id) if fallback_chain.contains(&id.as_str()) => break 'fallback,
887
888 Some(id) => {
889 fallback_chain.push(id.as_str());
890
891 id.as_str()
895 }
896 };
897
898 current_id = fallback_id;
899 }
900
901 Err(EpubError::NoSupportedFileFormat)
902 }
903
904 pub fn get_cover(&self) -> Option<(Vec<u8>, String)> {
921 self.manifest
922 .values()
923 .filter(|manifest| {
924 manifest.id.to_ascii_lowercase().contains("cover")
925 || manifest
926 .properties
927 .as_ref()
928 .map(|properties| properties.to_ascii_lowercase().contains("cover"))
929 .unwrap_or(false)
930 })
931 .find_map(|manifest| {
932 self.get_resource(manifest)
933 .map_err(|err| log::warn!("{err}"))
934 .ok()
935 })
936 }
937
938 fn get_resource(&self, resource_item: &ManifestItem) -> Result<(Vec<u8>, String), EpubError> {
940 let path = resource_item
941 .path
942 .to_str()
943 .expect("manifest item path should be valid UTF-8");
944
945 let mut archive = self.archive.lock()?;
946 let mut data = match archive.by_name(path) {
947 Ok(mut file) => {
948 let mut entry = Vec::<u8>::new();
949 file.read_to_end(&mut entry)?;
950 Ok(entry)
951 }
952 Err(ZipError::FileNotFound) => {
953 Err(EpubError::ResourceNotFound { resource: path.to_string() })
954 }
955 Err(err) => Err(EpubError::from(err)),
956 }?;
957
958 if let Some(method) = self.is_encryption_file(path) {
959 data = self.auto_dencrypt(&method, &mut data)?;
960 }
961
962 Ok((data, resource_item.mime.clone()))
963 }
964
965 pub fn navigate_by_spine_index(&mut self, index: usize) -> Option<(Vec<u8>, String)> {
984 if index >= self.spine.len() {
985 return None;
986 }
987
988 let manifest_id = self.spine[index].idref.as_ref();
989 self.current_spine_index.store(index, Ordering::SeqCst);
990 self.get_manifest_item(manifest_id)
991 .map_err(|err| log::warn!("{err}"))
992 .ok()
993 }
994
995 pub fn spine_prev(&self) -> Option<(Vec<u8>, String)> {
1007 let current_index = self.current_spine_index.load(Ordering::SeqCst);
1008 if current_index == 0 || !self.spine[current_index].linear {
1009 return None;
1010 }
1011
1012 let prev_index = (0..current_index)
1013 .rev()
1014 .find(|&index| self.spine[index].linear)?;
1015
1016 self.current_spine_index.store(prev_index, Ordering::SeqCst);
1017 let manifest_id = self.spine[prev_index].idref.as_ref();
1018 self.get_manifest_item(manifest_id)
1019 .map_err(|err| log::warn!("{err}"))
1020 .ok()
1021 }
1022
1023 pub fn spine_next(&mut self) -> Option<(Vec<u8>, String)> {
1035 let current_index = self.current_spine_index.load(Ordering::SeqCst);
1036 if current_index >= self.spine.len() - 1 || !self.spine[current_index].linear {
1037 return None;
1038 }
1039
1040 let next_index =
1041 (current_index + 1..self.spine.len()).find(|&index| self.spine[index].linear)?;
1042
1043 self.current_spine_index.store(next_index, Ordering::SeqCst);
1044 let manifest_id = self.spine[next_index].idref.as_ref();
1045 self.get_manifest_item(manifest_id)
1046 .map_err(|err| log::warn!("{err}"))
1047 .ok()
1048 }
1049
1050 pub fn spine_current(&self) -> Option<(Vec<u8>, String)> {
1060 let manifest_id = self.spine[self.current_spine_index.load(Ordering::SeqCst)]
1061 .idref
1062 .as_ref();
1063 self.get_manifest_item(manifest_id)
1064 .map_err(|err| log::warn!("{err}"))
1065 .ok()
1066 }
1067
1068 fn determine_epub_version(opf_element: &XmlElement) -> Result<EpubVersion, EpubError> {
1078 if let Some(version) = opf_element.get_attr("version") {
1080 match version.as_str() {
1081 "2.0" => return Ok(EpubVersion::Version2_0),
1082 "3.0" => return Ok(EpubVersion::Version3_0),
1083 _ => {}
1084 }
1085 }
1086
1087 let spine_element = opf_element
1088 .find_elements_by_name("spine")
1089 .next()
1090 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?;
1091
1092 if spine_element.get_attr("toc").is_some() {
1094 return Ok(EpubVersion::Version2_0);
1095 }
1096
1097 let manifest_element = opf_element
1098 .find_elements_by_name("manifest")
1099 .next()
1100 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "manifest".to_string() })?;
1101
1102 manifest_element
1104 .children()
1105 .find_map(|element| {
1106 if let Some(id) = element.get_attr("id") {
1107 if id.eq("nav") {
1108 return Some(EpubVersion::Version3_0);
1109 }
1110 }
1111
1112 None
1113 })
1114 .ok_or(EpubError::UnrecognizedEpubVersion)
1115 }
1116
1117 #[inline]
1127 fn parse_dc_metadata(
1128 &self,
1129 element: &XmlElement,
1130 metadata: &mut Vec<MetadataItem>,
1131 ) -> Result<(), EpubError> {
1133 let id = element.get_attr("id");
1134 let lang = element.get_attr("lang");
1135 let property = element.name.clone();
1136 let value = element.text().normalize_whitespace();
1137
1138 let refined = match self.version {
1139 EpubVersion::Version2_0 => element
1142 .attributes
1143 .iter()
1144 .map(|(name, value)| {
1145 let property = name.to_string();
1146 let value = value.to_string().normalize_whitespace();
1147
1148 MetadataRefinement {
1149 refines: id.clone().unwrap(),
1150 property,
1151 value,
1152 lang: None,
1153 scheme: None,
1154 }
1155 })
1156 .collect(),
1157 EpubVersion::Version3_0 => vec![],
1158 };
1159
1160 metadata.push(MetadataItem { id, property, value, lang, refined });
1161
1162 Ok(())
1163 }
1164
1165 #[inline]
1176 fn parse_opf_metadata(
1177 &self,
1178 element: &XmlElement,
1179 metadata: &mut Vec<MetadataItem>,
1180 metadata_link: &mut Vec<MetadataLinkItem>,
1181 refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1182 ) -> Result<(), EpubError> {
1183 match element.name.as_str() {
1184 "meta" => self.parse_meta_element(element, metadata, refinements),
1185 "link" => self.parse_link_element(element, metadata_link),
1186 _ => Ok(()),
1187 }
1188 }
1189
1190 #[inline]
1191 fn parse_meta_element(
1192 &self,
1193 element: &XmlElement,
1194 metadata: &mut Vec<MetadataItem>,
1195 refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1196 ) -> Result<(), EpubError> {
1197 match self.version {
1198 EpubVersion::Version2_0 => {
1199 let property = element
1200 .get_attr("name")
1201 .ok_or_else(|| EpubError::NonCanonicalFile { tag: element.tag_name() })?;
1202 let value = element
1203 .get_attr("content")
1204 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1205 tag: element.tag_name(),
1206 attribute: "content".to_string(),
1207 })?
1208 .normalize_whitespace();
1209
1210 metadata.push(MetadataItem {
1211 id: None,
1212 property,
1213 value,
1214 lang: None,
1215 refined: vec![],
1216 });
1217 }
1218
1219 EpubVersion::Version3_0 => {
1220 let property = element.get_attr("property").ok_or_else(|| {
1221 EpubError::MissingRequiredAttribute {
1222 tag: element.tag_name(),
1223 attribute: "property".to_string(),
1224 }
1225 })?;
1226 let value = element.text().normalize_whitespace();
1227 let lang = element.get_attr("lang");
1228
1229 if let Some(refines) = element.get_attr("refines") {
1230 let id = refines.strip_prefix("#").unwrap_or(&refines).to_string();
1231 let scheme = element.get_attr("scheme");
1232 let refinement = MetadataRefinement {
1233 refines: id.clone(),
1234 property,
1235 value,
1236 lang,
1237 scheme,
1238 };
1239
1240 if let Some(refinements) = refinements.get_mut(&id) {
1241 refinements.push(refinement);
1242 } else {
1243 refinements.insert(id, vec![refinement]);
1244 }
1245 } else {
1246 let id = element.get_attr("id");
1247 let item = MetadataItem {
1248 id,
1249 property,
1250 value,
1251 lang,
1252 refined: vec![],
1253 };
1254
1255 metadata.push(item);
1256 };
1257 }
1258 }
1259 Ok(())
1260 }
1261
1262 #[inline]
1263 fn parse_link_element(
1264 &self,
1265 element: &XmlElement,
1266 metadata_link: &mut Vec<MetadataLinkItem>,
1267 ) -> Result<(), EpubError> {
1268 let href = element
1269 .get_attr("href")
1270 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1271 tag: element.tag_name(),
1272 attribute: "href".to_string(),
1273 })?;
1274 let rel = element
1275 .get_attr("rel")
1276 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1277 tag: element.tag_name(),
1278 attribute: "rel".to_string(),
1279 })?;
1280 let hreflang = element.get_attr("hreflang");
1281 let id = element.get_attr("id");
1282 let mime = element.get_attr("media-type");
1283 let properties = element.get_attr("properties");
1284
1285 metadata_link.push(MetadataLinkItem {
1286 href,
1287 rel,
1288 hreflang,
1289 id,
1290 mime,
1291 properties,
1292 refines: None,
1293 });
1294 Ok(())
1295 }
1296
1297 fn parse_nav_points(&self, parent_element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1303 let mut nav_points = Vec::new();
1304 for nav_point in parent_element.find_children_by_name("navPoint") {
1305 let label = match nav_point.find_children_by_name("navLabel").next() {
1306 Some(element) => element.text(),
1307 None => String::new(),
1308 };
1309
1310 let content = nav_point
1311 .find_children_by_name("content")
1312 .next()
1313 .map(|element| PathBuf::from(element.text()));
1314
1315 let play_order = nav_point
1316 .get_attr("playOrder")
1317 .and_then(|order| order.parse::<usize>().ok());
1318
1319 let children = self.parse_nav_points(nav_point)?;
1320
1321 nav_points.push(NavPoint { label, content, play_order, children });
1322 }
1323
1324 nav_points.sort();
1325 Ok(nav_points)
1326 }
1327
1328 fn parse_catalog_list(&self, element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1334 let mut catalog = Vec::new();
1335 for item in element.children() {
1336 if item.tag_name() != "li" {
1337 return Err(EpubError::NonCanonicalFile { tag: "li".to_string() });
1338 }
1339
1340 let title_element = item
1341 .find_children_by_names(&["span", "a"])
1342 .next()
1343 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "span/a".to_string() })?;
1344 let content_href = title_element.get_attr("href").map(PathBuf::from);
1345 let sub_list = if let Some(list) = item.find_children_by_name("ol").next() {
1346 self.parse_catalog_list(list)?
1347 } else {
1348 vec![]
1349 };
1350
1351 catalog.push(NavPoint {
1352 label: title_element.text(),
1353 content: content_href,
1354 children: sub_list,
1355 play_order: None,
1356 });
1357 }
1358
1359 Ok(catalog)
1360 }
1361
1362 #[inline]
1379 fn normalize_manifest_path(&self, path: &str) -> Result<PathBuf, EpubError> {
1380 let mut path = if path.starts_with("../") {
1381 let mut current_dir = self.epub_path.join(&self.package_path);
1382 current_dir.pop();
1383
1384 check_realtive_link_leakage(self.epub_path.clone(), current_dir, path)
1385 .map(PathBuf::from)
1386 .ok_or_else(|| EpubError::RelativeLinkLeakage { path: path.to_string() })?
1387 } else if let Some(path) = path.strip_prefix("/") {
1388 PathBuf::from(path.to_string())
1389 } else {
1390 self.base_path.join(path)
1391 };
1392
1393 #[cfg(windows)]
1394 {
1395 path = PathBuf::from(path.to_string_lossy().replace('\\', "/"));
1396 }
1397
1398 Ok(path)
1399 }
1400
1401 fn validate_fallback_chains(&self) {
1413 for (id, item) in &self.manifest {
1414 if item.fallback.is_none() {
1415 continue;
1416 }
1417
1418 let mut fallback_chain = Vec::new();
1419 if let Err(msg) = self.validate_fallback_chain(id, &mut fallback_chain) {
1420 log::warn!("Invalid fallback chain for item {}: {}", id, msg);
1421 }
1422 }
1423 }
1424
1425 fn validate_fallback_chain(
1439 &self,
1440 manifest_id: &str,
1441 fallback_chain: &mut Vec<String>,
1442 ) -> Result<(), String> {
1443 if fallback_chain.contains(&manifest_id.to_string()) {
1444 fallback_chain.push(manifest_id.to_string());
1445
1446 return Err(format!(
1447 "Circular reference detected in fallback chain for {}",
1448 fallback_chain.join("->")
1449 ));
1450 }
1451
1452 let item = self.manifest.get(manifest_id).unwrap();
1454
1455 if let Some(fallback_id) = &item.fallback {
1456 if !self.manifest.contains_key(fallback_id) {
1457 return Err(format!(
1458 "Fallback resource {} does not exist in manifest",
1459 fallback_id
1460 ));
1461 }
1462
1463 fallback_chain.push(manifest_id.to_string());
1464 self.validate_fallback_chain(fallback_id, fallback_chain)
1465 } else {
1466 Ok(())
1468 }
1469 }
1470
1471 fn is_encryption_file(&self, path: &str) -> Option<String> {
1484 self.encryption.as_ref().and_then(|encryptions| {
1485 encryptions
1486 .iter()
1487 .find(|encryption| encryption.data == path)
1488 .map(|encryption| encryption.method.clone())
1489 })
1490 }
1491
1492 #[inline]
1510 fn auto_dencrypt(&self, method: &str, data: &mut [u8]) -> Result<Vec<u8>, EpubError> {
1511 match method {
1512 "http://www.idpf.org/2008/embedding" => {
1513 Ok(idpf_font_dencryption(data, &self.unique_identifier))
1514 }
1515 "http://ns.adobe.com/pdf/enc#RC" => {
1516 Ok(adobe_font_dencryption(data, &self.unique_identifier))
1517 }
1518 _ => Err(EpubError::UnsupportedEncryptedMethod { method: method.to_string() }),
1519 }
1520 }
1521}
1522
1523impl EpubDoc<BufReader<File>> {
1524 pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, EpubError> {
1536 let file = File::open(&path).map_err(EpubError::from)?;
1537 let path = fs::canonicalize(path)?;
1538
1539 Self::from_reader(BufReader::new(file), path)
1540 }
1541
1542 pub fn is_valid_epub<P: AsRef<Path>>(path: P) -> Result<bool, EpubError> {
1557 let result = EpubDoc::new(path);
1558
1559 match result {
1560 Ok(_) => Ok(true),
1561 Err(err) if Self::is_outside_error(&err) => Err(err),
1562 Err(_) => Ok(false),
1563 }
1564 }
1565
1566 fn is_outside_error(err: &EpubError) -> bool {
1585 matches!(
1586 err,
1587 EpubError::ArchiveError { .. }
1588 | EpubError::IOError { .. }
1589 | EpubError::MutexError
1590 | EpubError::Utf8DecodeError { .. }
1591 | EpubError::Utf16DecodeError { .. }
1592 | EpubError::QuickXmlError { .. }
1593 )
1594 }
1595}
1596
1597#[cfg(test)]
1598mod tests {
1599 use std::{
1600 fs::File,
1601 io::BufReader,
1602 path::{Path, PathBuf},
1603 };
1604
1605 use crate::{epub::EpubDoc, error::EpubError, utils::XmlReader};
1606
1607 mod package_documents_tests {
1609 use std::{path::Path, sync::atomic::Ordering};
1610
1611 use crate::epub::{EpubDoc, EpubVersion};
1612
1613 #[test]
1617 fn test_pkg_collections_unknown() {
1618 let epub_file = Path::new("./test_case/pkg-collections-unknown.epub");
1619 let doc = EpubDoc::new(epub_file);
1620 assert!(doc.is_ok());
1621 }
1622
1623 #[test]
1627 fn test_pkg_creator_order() {
1628 let epub_file = Path::new("./test_case/pkg-creator-order.epub");
1629 let doc = EpubDoc::new(epub_file);
1630 assert!(doc.is_ok());
1631
1632 let doc = doc.unwrap();
1633 let creators = doc.get_metadata_value("creator");
1634 assert!(creators.is_some());
1635
1636 let creators = creators.unwrap();
1637 assert_eq!(creators.len(), 5);
1638 assert_eq!(
1639 creators,
1640 vec![
1641 "Dave Cramer",
1642 "Wendy Reid",
1643 "Dan Lazin",
1644 "Ivan Herman",
1645 "Brady Duga",
1646 ]
1647 );
1648 }
1649
1650 #[test]
1654 fn test_pkg_manifest_order() {
1655 let epub_file = Path::new("./test_case/pkg-manifest-unknown.epub");
1656 let doc = EpubDoc::new(epub_file);
1657 assert!(doc.is_ok());
1658
1659 let doc = doc.unwrap();
1660 assert_eq!(doc.manifest.len(), 2);
1661 assert!(doc.get_manifest_item("nav").is_ok());
1662 assert!(doc.get_manifest_item("content_001").is_ok());
1663 assert!(doc.get_manifest_item("content_002").is_err());
1664 }
1665
1666 #[test]
1670 fn test_pkg_meta_unknown() {
1671 let epub_file = Path::new("./test_case/pkg-meta-unknown.epub");
1672 let doc = EpubDoc::new(epub_file);
1673 assert!(doc.is_ok());
1674
1675 let doc = doc.unwrap();
1676 let value = doc.get_metadata_value("dcterms:isReferencedBy");
1677 assert!(value.is_some());
1678 let value = value.unwrap();
1679 assert_eq!(value.len(), 1);
1680 assert_eq!(
1681 value,
1682 vec!["https://www.w3.org/TR/epub-rs/#confreq-rs-pkg-meta-unknown"]
1683 );
1684
1685 let value = doc.get_metadata_value("dcterms:modified");
1686 assert!(value.is_some());
1687 let value = value.unwrap();
1688 assert_eq!(value.len(), 1);
1689 assert_eq!(value, vec!["2021-01-11T00:00:00Z"]);
1690
1691 let value = doc.get_metadata_value("dcterms:title");
1692 assert!(value.is_none());
1693 }
1694
1695 #[test]
1699 fn test_pkg_meta_white_space() {
1700 let epub_file = Path::new("./test_case/pkg-meta-whitespace.epub");
1701 let doc = EpubDoc::new(epub_file);
1702 assert!(doc.is_ok());
1703
1704 let doc = doc.unwrap();
1705 let value = doc.get_metadata_value("creator");
1706 assert!(value.is_some());
1707 let value = value.unwrap();
1708 assert_eq!(value.len(), 1);
1709 assert_eq!(value, vec!["Dave Cramer"]);
1710
1711 let value = doc.get_metadata_value("description");
1712 assert!(value.is_some());
1713 let value = value.unwrap();
1714 assert_eq!(value.len(), 1);
1715 assert_eq!(
1716 value,
1717 vec![
1718 "The package document's title and creator contain leading and trailing spaces along with excess internal whitespace. The reading system must render only a single space in all cases."
1719 ]
1720 );
1721 }
1722
1723 #[test]
1727 fn test_pkg_spine_duplicate_item_hyperlink() {
1728 let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-hyperlink.epub");
1729 let doc = EpubDoc::new(epub_file);
1730 assert!(doc.is_ok());
1731
1732 let mut doc = doc.unwrap();
1733 assert_eq!(doc.spine.len(), 4);
1734 assert_eq!(
1735 doc.navigate_by_spine_index(0).unwrap(),
1736 doc.get_manifest_item("content_001").unwrap()
1737 );
1738 assert_eq!(
1739 doc.navigate_by_spine_index(1).unwrap(),
1740 doc.get_manifest_item("content_002").unwrap()
1741 );
1742 assert_eq!(
1743 doc.navigate_by_spine_index(2).unwrap(),
1744 doc.get_manifest_item("content_002").unwrap()
1745 );
1746 assert_eq!(
1747 doc.navigate_by_spine_index(3).unwrap(),
1748 doc.get_manifest_item("content_002").unwrap()
1749 );
1750 }
1751
1752 #[test]
1756 fn test_pkg_spine_duplicate_item_rendering() {
1757 let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-rendering.epub");
1758 let doc = EpubDoc::new(epub_file);
1759 assert!(doc.is_ok());
1760
1761 let mut doc = doc.unwrap();
1762 assert_eq!(doc.spine.len(), 4);
1763
1764 let result = doc.spine_prev();
1765 assert!(result.is_none());
1766
1767 let result = doc.spine_next();
1768 assert!(result.is_some());
1769
1770 doc.spine_next();
1771 doc.spine_next();
1772 let result = doc.spine_next();
1773 assert!(result.is_none());
1774 }
1775
1776 #[test]
1780 fn test_pkg_spine_nonlinear_activation() {
1781 let epub_file = Path::new("./test_case/pkg-spine-nonlinear-activation.epub");
1782 let doc = EpubDoc::new(epub_file);
1783 assert!(doc.is_ok());
1784
1785 let mut doc = doc.unwrap();
1786 assert!(doc.spine_prev().is_none());
1787 assert!(doc.spine_next().is_none());
1788
1789 assert!(doc.navigate_by_spine_index(1).is_some());
1790 assert!(doc.spine_prev().is_none());
1791 assert!(doc.spine_next().is_none());
1792 }
1793
1794 #[test]
1798 fn test_pkg_spine_order() {
1799 let epub_file = Path::new("./test_case/pkg-spine-order.epub");
1800 let doc = EpubDoc::new(epub_file);
1801 assert!(doc.is_ok());
1802
1803 let doc = doc.unwrap();
1804 assert_eq!(doc.spine.len(), 4);
1805 assert_eq!(
1806 doc.spine
1807 .iter()
1808 .map(|item| item.idref.clone())
1809 .collect::<Vec<String>>(),
1810 vec![
1811 "d-content_001",
1812 "c-content_002",
1813 "b-content_003",
1814 "a-content_004",
1815 ]
1816 );
1817 }
1818
1819 #[test]
1823 fn test_spine_order_svg() {
1824 let epub_file = Path::new("./test_case/pkg-spine-order-svg.epub");
1825 let doc = EpubDoc::new(epub_file);
1826 assert!(doc.is_ok());
1827
1828 let mut doc = doc.unwrap();
1829 assert_eq!(doc.spine.len(), 4);
1830
1831 loop {
1832 if let Some(spine) = doc.spine_next() {
1833 let idref = doc.spine[doc.current_spine_index.load(Ordering::Relaxed)]
1834 .idref
1835 .clone();
1836 let resource = doc.get_manifest_item(&idref);
1837 assert!(resource.is_ok());
1838
1839 let resource = resource.unwrap();
1840 assert_eq!(spine, resource);
1841 } else {
1842 break;
1843 }
1844 }
1845
1846 assert_eq!(doc.current_spine_index.load(Ordering::Relaxed), 3);
1847 }
1848
1849 #[test]
1853 fn test_pkg_spine_unknown() {
1854 let epub_file = Path::new("./test_case/pkg-spine-unknown.epub");
1855 let doc = EpubDoc::new(epub_file);
1856 assert!(doc.is_ok());
1857
1858 let doc = doc.unwrap();
1859 assert_eq!(doc.spine.len(), 1);
1860 assert_eq!(doc.spine[0].idref, "content_001");
1861 assert_eq!(doc.spine[0].id, None);
1862 assert_eq!(doc.spine[0].linear, true);
1863 assert_eq!(doc.spine[0].properties, Some("untrustworthy".to_string()));
1864 }
1865
1866 #[test]
1870 fn test_pkg_title_order() {
1871 let epub_file = Path::new("./test_case/pkg-title-order.epub");
1872 let doc = EpubDoc::new(epub_file);
1873 assert!(doc.is_ok());
1874
1875 let doc = doc.unwrap();
1876 let title_list = doc.get_title();
1877 assert_eq!(title_list.len(), 6);
1878 assert_eq!(
1879 title_list,
1880 vec![
1881 "pkg-title-order",
1882 "This title must not display first",
1883 "Also, this title must not display first",
1884 "This title also must not display first",
1885 "This title must also not display first",
1886 "This title must not display first, also",
1887 ]
1888 );
1889 }
1890
1891 #[test]
1895 fn test_pkg_unique_id() {
1896 let epub_file = Path::new("./test_case/pkg-unique-id.epub");
1897 let doc_1 = EpubDoc::new(epub_file);
1898 assert!(doc_1.is_ok());
1899
1900 let epub_file = Path::new("./test_case/pkg-unique-id_duplicate.epub");
1901 let doc_2 = EpubDoc::new(epub_file);
1902 assert!(doc_2.is_ok());
1903
1904 let doc_1 = doc_1.unwrap();
1905 let doc_2 = doc_2.unwrap();
1906
1907 assert_eq!(doc_1.get_identifier(), doc_2.get_identifier());
1908 assert_eq!(doc_1.unique_identifier, "pkg-unique-id");
1909 assert_eq!(doc_2.unique_identifier, "pkg-unique-id");
1910 }
1911
1912 #[test]
1916 fn test_pkg_version_backward() {
1917 let epub_file = Path::new("./test_case/pkg-version-backward.epub");
1918 let doc = EpubDoc::new(epub_file);
1919 assert!(doc.is_ok());
1920
1921 let doc = doc.unwrap();
1922 assert_eq!(doc.version, EpubVersion::Version3_0);
1923 }
1924
1925 #[test]
1929 fn test_pkg_linked_records() {
1930 let epub_file = Path::new("./test_case/pkg-linked-records.epub");
1931 let doc = EpubDoc::new(epub_file);
1932 assert!(doc.is_ok());
1933
1934 let doc = doc.unwrap();
1935 assert_eq!(doc.metadata_link.len(), 3);
1936
1937 let item = doc.metadata_link.iter().find(|&item| {
1938 if let Some(properties) = &item.properties {
1939 properties.eq("onix")
1940 } else {
1941 false
1942 }
1943 });
1944 assert!(item.is_some());
1945 }
1946
1947 #[test]
1951 fn test_pkg_manifest_unlisted_resource() {
1952 let epub_file = Path::new("./test_case/pkg-manifest-unlisted-resource.epub");
1953 let doc = EpubDoc::new(epub_file);
1954 assert!(doc.is_ok());
1955
1956 let doc = doc.unwrap();
1957 assert!(
1958 doc.get_manifest_item_by_path("EPUB/content_001.xhtml")
1959 .is_ok()
1960 );
1961
1962 assert!(doc.get_manifest_item_by_path("EPUB/red.png").is_err());
1963 let err = doc.get_manifest_item_by_path("EPUB/red.png").unwrap_err();
1964 assert_eq!(
1965 err.to_string(),
1966 "Resource not found: Unable to find resource from \"EPUB/red.png\"."
1967 );
1968 }
1969 }
1970
1971 mod manifest_fallbacks_tests {
1975 use std::path::Path;
1976
1977 use crate::epub::EpubDoc;
1978
1979 #[test]
1983 fn test_pub_foreign_bad_fallback() {
1984 let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
1985 let doc = EpubDoc::new(epub_file);
1986 assert!(doc.is_ok());
1987
1988 let doc = doc.unwrap();
1989 assert!(doc.get_manifest_item("content_001").is_ok());
1990 assert!(doc.get_manifest_item("bar").is_ok());
1991
1992 assert_eq!(
1993 doc.get_manifest_item_with_fallback("content_001", &vec!["application/xhtml+xml"])
1994 .unwrap_err()
1995 .to_string(),
1996 "No supported file format: The fallback resource does not contain the file format you support."
1997 );
1998 }
1999
2000 #[test]
2004 fn test_pub_foreign_image() {
2005 let epub_file = Path::new("./test_case/pub-foreign_image.epub");
2006 let doc = EpubDoc::new(epub_file);
2007 assert!(doc.is_ok());
2008
2009 let doc = doc.unwrap();
2010 let result = doc.get_manifest_item_with_fallback(
2011 "image-tiff",
2012 &vec!["image/png", "application/xhtml+xml"],
2013 );
2014 assert!(result.is_ok());
2015
2016 let (_, mime) = result.unwrap();
2017 assert_eq!(mime, "image/png");
2018 }
2019
2020 #[test]
2024 fn test_pub_foreign_json_spine() {
2025 let epub_file = Path::new("./test_case/pub-foreign_json-spine.epub");
2026 let doc = EpubDoc::new(epub_file);
2027 assert!(doc.is_ok());
2028
2029 let doc = doc.unwrap();
2030 let result = doc.get_manifest_item_with_fallback(
2031 "content_primary",
2032 &vec!["application/xhtml+xml", "application/json"],
2033 );
2034 assert!(result.is_ok());
2035 let (_, mime) = result.unwrap();
2036 assert_eq!(mime, "application/json");
2037
2038 let result = doc
2039 .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
2040 assert!(result.is_ok());
2041 let (_, mime) = result.unwrap();
2042 assert_eq!(mime, "application/xhtml+xml");
2043 }
2044
2045 #[test]
2049 fn test_pub_foreign_xml_spine() {
2050 let epub_file = Path::new("./test_case/pub-foreign_xml-spine.epub");
2051 let doc = EpubDoc::new(epub_file);
2052 assert!(doc.is_ok());
2053
2054 let doc = doc.unwrap();
2055 let result = doc.get_manifest_item_with_fallback(
2056 "content_primary",
2057 &vec!["application/xhtml+xml", "application/xml"],
2058 );
2059 assert!(result.is_ok());
2060 let (_, mime) = result.unwrap();
2061 assert_eq!(mime, "application/xml");
2062
2063 let result = doc
2064 .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
2065 assert!(result.is_ok());
2066 let (_, mime) = result.unwrap();
2067 assert_eq!(mime, "application/xhtml+xml");
2068 }
2069
2070 #[test]
2074 fn test_pub_foreign_xml_suffix_spine() {
2075 let epub_file = Path::new("./test_case/pub-foreign_xml-suffix-spine.epub");
2076 let doc = EpubDoc::new(epub_file);
2077 assert!(doc.is_ok());
2078
2079 let doc = doc.unwrap();
2080 let result = doc.get_manifest_item_with_fallback(
2081 "content_primary",
2082 &vec!["application/xhtml+xml", "application/dtc+xml"],
2083 );
2084 assert!(result.is_ok());
2085 let (_, mime) = result.unwrap();
2086 assert_eq!(mime, "application/dtc+xml");
2087
2088 let result = doc
2089 .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
2090 assert!(result.is_ok());
2091 let (_, mime) = result.unwrap();
2092 assert_eq!(mime, "application/xhtml+xml");
2093 }
2094 }
2095
2096 mod open_container_format_tests {
2098 use std::{cmp::min, io::Read, path::Path};
2099
2100 use sha1::{Digest, Sha1};
2101
2102 use crate::epub::EpubDoc;
2103
2104 #[test]
2108 fn test_ocf_metainf_inc() {
2109 let epub_file = Path::new("./test_case/ocf-metainf-inc.epub");
2110 let doc = EpubDoc::new(epub_file);
2111 assert!(doc.is_ok());
2112 }
2113
2114 #[test]
2118 fn test_ocf_metainf_manifest() {
2119 let epub_file = Path::new("./test_case/ocf-metainf-manifest.epub");
2120 let doc = EpubDoc::new(epub_file);
2121 assert!(doc.is_ok());
2122 }
2123
2124 #[test]
2128 fn test_ocf_package_arbitrary() {
2129 let epub_file = Path::new("./test_case/ocf-package_arbitrary.epub");
2130 let doc = EpubDoc::new(epub_file);
2131 assert!(doc.is_ok());
2132
2133 let doc = doc.unwrap();
2134 assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2135 }
2136
2137 #[test]
2141 fn test_ocf_package_multiple() {
2142 let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2143 let doc = EpubDoc::new(epub_file);
2144 assert!(doc.is_ok());
2145
2146 let doc = doc.unwrap();
2147 assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2148 assert_eq!(doc.base_path, Path::new("FOO/BAR"));
2149 }
2150
2151 #[test]
2155 fn test_ocf_url_link_leaking_relative() {
2156 let epub_file = Path::new("./test_case/ocf-url_link-leaking-relative.epub");
2157 let doc = EpubDoc::new(epub_file);
2158 assert!(doc.is_err());
2159 assert_eq!(
2160 doc.err().unwrap().to_string(),
2161 String::from(
2162 "Relative link leakage: Path \"../../../../media/imgs/monastery.jpg\" is out of container range."
2163 )
2164 )
2165 }
2166
2167 #[test]
2171 fn test_ocf_url_link_path_absolute() {
2172 let epub_file = Path::new("./test_case/ocf-url_link-path-absolute.epub");
2173 let doc = EpubDoc::new(epub_file);
2174 assert!(doc.is_ok());
2175
2176 let doc = doc.unwrap();
2177 let resource = doc.manifest.get("photo").unwrap();
2178 assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2179 }
2180
2181 #[test]
2185 fn test_ocf_url_link_relative() {
2186 let epub_file = Path::new("./test_case/ocf-url_link-relative.epub");
2187 let doc = EpubDoc::new(epub_file);
2188 assert!(doc.is_ok());
2189
2190 let doc = doc.unwrap();
2191 let resource = doc.manifest.get("photo").unwrap();
2192 assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2193 }
2194
2195 #[test]
2199 fn test_ocf_url_manifest() {
2200 let epub_file = Path::new("./test_case/ocf-url_manifest.epub");
2201 let doc = EpubDoc::new(epub_file);
2202 assert!(doc.is_ok());
2203
2204 let doc = doc.unwrap();
2205 assert!(doc.get_manifest_item("nav").is_ok());
2206 assert!(doc.get_manifest_item("content_001").is_ok());
2207 assert!(doc.get_manifest_item("content_002").is_err());
2208 }
2209
2210 #[test]
2214 fn test_ocf_url_relative() {
2215 let epub_file = Path::new("./test_case/ocf-url_relative.epub");
2216 let doc = EpubDoc::new(epub_file);
2217 assert!(doc.is_ok());
2218
2219 let doc = doc.unwrap();
2220 assert_eq!(doc.package_path, Path::new("foo/BAR/baz.opf"));
2221 assert_eq!(doc.base_path, Path::new("foo/BAR"));
2222 assert_eq!(
2223 doc.manifest.get("nav").unwrap().path,
2224 Path::new("foo/BAR/nav.xhtml")
2225 );
2226 assert_eq!(
2227 doc.manifest.get("content_001").unwrap().path,
2228 Path::new("foo/BAR/qux/content_001.xhtml")
2229 );
2230 assert!(doc.get_manifest_item("nav").is_ok());
2231 assert!(doc.get_manifest_item("content_001").is_ok());
2232 }
2233
2234 #[test]
2239 fn test_ocf_zip_comp() {
2240 let epub_file = Path::new("./test_case/ocf-zip-comp.epub");
2241 let doc = EpubDoc::new(epub_file);
2242 assert!(doc.is_ok());
2243 }
2244
2245 #[test]
2250 fn test_ocf_zip_mult() {
2251 let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2252 let doc = EpubDoc::new(epub_file);
2253 assert!(doc.is_ok());
2254 }
2255
2256 #[test]
2260 fn test_ocf_font_obfuscation() {
2261 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2262 let doc = EpubDoc::new(epub_file);
2263 assert!(doc.is_ok());
2264
2265 let doc = doc.unwrap();
2266 let unique_id = doc.unique_identifier.clone();
2267
2268 let mut hasher = Sha1::new();
2269 hasher.update(unique_id.as_bytes());
2270 let hash = hasher.finalize();
2271 let mut key = vec![0u8; 1040];
2272 for i in 0..1040 {
2273 key[i] = hash[i % hash.len()];
2274 }
2275
2276 assert!(doc.encryption.is_some());
2277 assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2278
2279 let data = &doc.encryption.unwrap()[0];
2280 assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2281
2282 let font_file = doc
2283 .archive
2284 .lock()
2285 .unwrap()
2286 .by_name(&data.data)
2287 .unwrap()
2288 .bytes()
2289 .collect::<Result<Vec<u8>, _>>();
2290 assert!(font_file.is_ok());
2291 let font_file = font_file.unwrap();
2292
2293 let mut deobfuscated = font_file.clone();
2295 for i in 0..min(1040, deobfuscated.len()) {
2296 deobfuscated[i] ^= key[i];
2297 }
2298
2299 assert!(is_valid_font(&deobfuscated));
2300 }
2301
2302 #[test]
2306 fn test_ocf_font_obfuscation_bis() {
2307 let epub_file = Path::new("./test_case/ocf-font_obfuscation_bis.epub");
2308 let doc = EpubDoc::new(epub_file);
2309 assert!(doc.is_ok());
2310
2311 let doc = doc.unwrap();
2312
2313 let wrong_unique_id = "wrong-publication-id";
2314 let mut hasher = Sha1::new();
2315 hasher.update(wrong_unique_id.as_bytes());
2316 let hash = hasher.finalize();
2317 let mut wrong_key = vec![0u8; 1040];
2318 for i in 0..1040 {
2319 wrong_key[i] = hash[i % hash.len()];
2320 }
2321
2322 assert!(doc.encryption.is_some());
2323 assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2324
2325 let data = &doc.encryption.unwrap()[0];
2326 assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2327
2328 let font_file = doc
2329 .archive
2330 .lock()
2331 .unwrap()
2332 .by_name(&data.data)
2333 .unwrap()
2334 .bytes()
2335 .collect::<Result<Vec<u8>, _>>();
2336 assert!(font_file.is_ok());
2337 let font_file = font_file.unwrap();
2338
2339 let mut deobfuscated_with_wrong_key = font_file.clone();
2341 for i in 0..std::cmp::min(1040, deobfuscated_with_wrong_key.len()) {
2342 deobfuscated_with_wrong_key[i] ^= wrong_key[i];
2343 }
2344
2345 assert!(!is_valid_font(&deobfuscated_with_wrong_key));
2346 }
2347
2348 fn is_valid_font(data: &[u8]) -> bool {
2349 if data.len() < 4 {
2350 return false;
2351 }
2352 let sig = &data[0..4];
2353 sig == b"OTTO"
2356 || sig == b"\x00\x01\x00\x00"
2357 || sig == b"\x00\x02\x00\x00"
2358 || sig == b"true"
2359 || sig == b"typ1"
2360 }
2361 }
2362
2363 #[test]
2364 fn test_parse_container() {
2365 let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2366 let doc = EpubDoc::new(epub_file);
2367 assert!(doc.is_ok());
2368
2369 let container = r#"
2371 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2372 <rootfiles></rootfiles>
2373 </container>
2374 "#
2375 .to_string();
2376
2377 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2378 assert!(result.is_err());
2379 assert_eq!(
2380 result.unwrap_err(),
2381 EpubError::NonCanonicalFile { tag: "rootfile".to_string() }
2382 );
2383
2384 let container = r#"
2385 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2386 <rootfiles>
2387 <rootfile media-type="application/oebps-package+xml"/>
2388 </rootfiles>
2389 </container>
2390 "#
2391 .to_string();
2392
2393 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2394 assert!(result.is_err());
2395 assert_eq!(
2396 result.unwrap_err(),
2397 EpubError::MissingRequiredAttribute {
2398 tag: "rootfile".to_string(),
2399 attribute: "full-path".to_string(),
2400 }
2401 );
2402
2403 let container = r#"
2404 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2405 <rootfiles>
2406 <rootfile media-type="application/oebps-package+xml" full-path="EPUB/content.opf"/>
2407 </rootfiles>
2408 </container>
2409 "#
2410 .to_string();
2411
2412 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2413 assert!(result.is_ok());
2414 assert_eq!(result.unwrap(), PathBuf::from("EPUB/content.opf"))
2415 }
2416
2417 #[test]
2418 fn test_parse_manifest() {
2419 let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2420 let doc = EpubDoc::new(epub_file);
2421 assert!(doc.is_ok());
2422
2423 let manifest = r#"
2424 <manifest>
2425 <item href="content_001.xhtml" media-type="application/xhtml+xml"/>
2426 <item properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2427 </manifest>
2428 "#;
2429 let mut doc = doc.unwrap();
2430 let element = XmlReader::parse(manifest);
2431 assert!(element.is_ok());
2432
2433 let element = element.unwrap();
2434 let result = doc.parse_manifest(&element);
2435 assert!(result.is_err());
2436 assert_eq!(
2437 result.unwrap_err(),
2438 EpubError::MissingRequiredAttribute {
2439 tag: "item".to_string(),
2440 attribute: "id".to_string(),
2441 },
2442 );
2443
2444 let manifest = r#"
2445 <manifest>
2446 <item id="content_001" media-type="application/xhtml+xml"/>
2447 <item id="nav" properties="nav" media-type="application/xhtml+xml"/>
2448 </manifest>
2449 "#;
2450 let element = XmlReader::parse(manifest);
2451 assert!(element.is_ok());
2452
2453 let element = element.unwrap();
2454 let result = doc.parse_manifest(&element);
2455 assert!(result.is_err());
2456 assert_eq!(
2457 result.unwrap_err(),
2458 EpubError::MissingRequiredAttribute {
2459 tag: "item".to_string(),
2460 attribute: "href".to_string(),
2461 },
2462 );
2463
2464 let manifest = r#"
2465 <manifest>
2466 <item id="content_001" href="content_001.xhtml"/>
2467 <item id="nav" properties="nav" href="nav.xhtml"/>
2468 </manifest>
2469 "#;
2470 let element = XmlReader::parse(manifest);
2471 assert!(element.is_ok());
2472
2473 let element = element.unwrap();
2474 let result = doc.parse_manifest(&element);
2475 assert!(result.is_err());
2476 assert_eq!(
2477 result.unwrap_err(),
2478 EpubError::MissingRequiredAttribute {
2479 tag: "item".to_string(),
2480 attribute: "media-type".to_string(),
2481 },
2482 );
2483
2484 let manifest = r#"
2485 <manifest>
2486 <item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
2487 <item id="nav" properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2488 </manifest>
2489 "#;
2490 let element = XmlReader::parse(manifest);
2491 assert!(element.is_ok());
2492
2493 let element = element.unwrap();
2494 let result = doc.parse_manifest(&element);
2495 assert!(result.is_ok());
2496 }
2497
2498 #[test]
2500 fn test_fn_has_encryption() {
2501 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2502 let doc = EpubDoc::new(epub_file);
2503 assert!(doc.is_ok());
2504
2505 let doc = doc.unwrap();
2506 assert!(doc.has_encryption());
2507 }
2508
2509 #[test]
2511 fn test_fn_parse_encryption() {
2512 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2513 let doc = EpubDoc::new(epub_file);
2514 assert!(doc.is_ok());
2515
2516 let doc = doc.unwrap();
2517 assert!(doc.encryption.is_some());
2518
2519 let encryption = doc.encryption.unwrap();
2520 assert_eq!(encryption.len(), 1);
2521 assert_eq!(encryption[0].method, "http://www.idpf.org/2008/embedding");
2522 assert_eq!(encryption[0].data, "EPUB/fonts/Lobster.ttf");
2523 }
2524
2525 #[test]
2526 fn test_get_metadata_existing_key() {
2527 let epub_file = Path::new("./test_case/epub-33.epub");
2528 let doc = EpubDoc::new(epub_file);
2529 assert!(doc.is_ok());
2530
2531 let doc = doc.unwrap();
2532
2533 let titles = doc.get_metadata("title");
2534 assert!(titles.is_some());
2535
2536 let titles = titles.unwrap();
2537 assert_eq!(titles.len(), 1);
2538 assert_eq!(titles[0].property, "title");
2539 assert_eq!(titles[0].value, "EPUB 3.3");
2540
2541 let languages = doc.get_metadata("language");
2542 assert!(languages.is_some());
2543
2544 let languages = languages.unwrap();
2545 assert_eq!(languages.len(), 1);
2546 assert_eq!(languages[0].property, "language");
2547 assert_eq!(languages[0].value, "en-us");
2548
2549 let language = doc.get_language();
2550 assert_eq!(language, vec!["en-us"]);
2551 }
2552
2553 #[test]
2554 fn test_get_metadata_nonexistent_key() {
2555 let epub_file = Path::new("./test_case/epub-33.epub");
2556 let doc = EpubDoc::new(epub_file);
2557 assert!(doc.is_ok());
2558
2559 let doc = doc.unwrap();
2560 let metadata = doc.get_metadata("nonexistent");
2561 assert!(metadata.is_none());
2562 }
2563
2564 #[test]
2565 fn test_get_metadata_multiple_items_same_type() {
2566 let epub_file = Path::new("./test_case/epub-33.epub");
2567 let doc = EpubDoc::new(epub_file);
2568 assert!(doc.is_ok());
2569
2570 let doc = doc.unwrap();
2571
2572 let creators = doc.get_metadata("creator");
2573 assert!(creators.is_some());
2574
2575 let creators = creators.unwrap();
2576 assert_eq!(creators.len(), 3);
2577
2578 assert_eq!(creators[0].id, Some("creator_id_0".to_string()));
2579 assert_eq!(creators[0].property, "creator");
2580 assert_eq!(creators[0].value, "Matt Garrish, DAISY Consortium");
2581
2582 assert_eq!(creators[1].id, Some("creator_id_1".to_string()));
2583 assert_eq!(creators[1].property, "creator");
2584 assert_eq!(creators[1].value, "Ivan Herman, W3C");
2585
2586 assert_eq!(creators[2].id, Some("creator_id_2".to_string()));
2587 assert_eq!(creators[2].property, "creator");
2588 assert_eq!(creators[2].value, "Dave Cramer, Invited Expert");
2589 }
2590
2591 #[test]
2592 fn test_get_metadata_with_refinement() {
2593 let epub_file = Path::new("./test_case/epub-33.epub");
2594 let doc = EpubDoc::new(epub_file);
2595 assert!(doc.is_ok());
2596
2597 let doc = doc.unwrap();
2598
2599 let title = doc.get_metadata("title");
2600 assert!(title.is_some());
2601
2602 let title = title.unwrap();
2603 assert_eq!(title.len(), 1);
2604 assert_eq!(title[0].refined.len(), 1);
2605 assert_eq!(title[0].refined[0].property, "title-type");
2606 assert_eq!(title[0].refined[0].value, "main");
2607 }
2608
2609 #[test]
2610 fn test_get_manifest_item_with_fallback() {
2611 let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
2612 let doc = EpubDoc::new(epub_file);
2613 assert!(doc.is_ok());
2614
2615 let doc = doc.unwrap();
2616 assert!(doc.get_manifest_item("content_001").is_ok());
2617 assert!(doc.get_manifest_item("bar").is_ok());
2618
2619 if let Ok((_, mime)) =
2621 doc.get_manifest_item_with_fallback("content_001", &vec!["image/psd"])
2622 {
2623 assert_eq!(mime, "image/psd");
2624 } else {
2625 assert!(false, "get_manifest_item_with_fallback failed");
2626 }
2627
2628 assert_eq!(
2630 doc.get_manifest_item_with_fallback("content_001", &vec!["application/xhtml+xml"])
2631 .unwrap_err()
2632 .to_string(),
2633 "No supported file format: The fallback resource does not contain the file format you support."
2634 );
2635 }
2636
2637 #[test]
2638 fn test_get_cover() {
2639 let epub_file = Path::new("./test_case/pkg-cover-image.epub");
2640 let doc = EpubDoc::new(epub_file);
2641 if let Err(err) = &doc {
2642 println!("{}", err);
2643 }
2644 assert!(doc.is_ok());
2645
2646 let doc = doc.unwrap();
2647 let result = doc.get_cover();
2648 assert!(result.is_some());
2649
2650 let (data, mime) = result.unwrap();
2651 assert_eq!(data.len(), 5785);
2652 assert_eq!(mime, "image/jpeg");
2653 }
2654
2655 #[test]
2656 fn test_epub_2() {
2657 let epub_file = Path::new("./test_case/epub-2.epub");
2658 let doc = EpubDoc::new(epub_file);
2659 assert!(doc.is_ok());
2660
2661 let doc = doc.unwrap();
2662
2663 let titles = doc.get_title();
2664 assert_eq!(titles, vec!["Minimal EPUB 2.0"]);
2665 }
2666
2667 #[test]
2668 fn test_is_valid_epub_valid_file() {
2669 let result = EpubDoc::is_valid_epub("./test_case/epub-2.epub");
2670 assert!(result.is_ok());
2671 assert_eq!(result.unwrap(), true);
2672 }
2673
2674 #[test]
2675 fn test_is_valid_epub_invalid_path() {
2676 let result = EpubDoc::is_valid_epub("./test_case/nonexistent.epub");
2677 assert!(result.is_err());
2678 }
2679
2680 #[test]
2681 fn test_is_valid_epub_corrupted_zip() {
2682 let temp_dir = std::env::temp_dir();
2683 let corrupted_file = temp_dir.join("corrupted.epub");
2684
2685 std::fs::write(&corrupted_file, b"not a valid zip file").unwrap();
2686
2687 let result = EpubDoc::is_valid_epub(&corrupted_file);
2688
2689 assert!(result.is_err());
2690 let err = result.unwrap_err();
2691 assert!(matches!(err, EpubError::ArchiveError { .. }));
2692
2693 std::fs::remove_file(corrupted_file).ok();
2694 }
2695
2696 #[test]
2697 fn test_is_valid_epub_valid_epub_3() {
2698 let result = EpubDoc::is_valid_epub("./test_case/epub-33.epub");
2699 assert!(result.is_ok());
2700 assert_eq!(result.unwrap(), true);
2701 }
2702
2703 #[test]
2704 fn test_is_outside_error() {
2705 let archive_error = EpubError::ArchiveError {
2706 source: zip::result::ZipError::Io(std::io::Error::new(
2707 std::io::ErrorKind::Other,
2708 "test",
2709 )),
2710 };
2711 assert!(EpubDoc::<BufReader<File>>::is_outside_error(&archive_error));
2712
2713 let io_error = EpubError::IOError {
2714 source: std::io::Error::new(std::io::ErrorKind::NotFound, "test"),
2715 };
2716 assert!(EpubDoc::<BufReader<File>>::is_outside_error(&io_error));
2717
2718 let non_canonical = EpubError::NonCanonicalEpub { expected_file: "test".to_string() };
2719 assert!(!EpubDoc::<BufReader<File>>::is_outside_error(
2720 &non_canonical
2721 ));
2722
2723 let missing_attr = EpubError::MissingRequiredAttribute {
2724 tag: "test".to_string(),
2725 attribute: "id".to_string(),
2726 };
2727 assert!(!EpubDoc::<BufReader<File>>::is_outside_error(&missing_attr));
2728 }
2729
2730 mod metadata_sheet_tests {
2731 use crate::epub::EpubDoc;
2732 use std::path::Path;
2733
2734 #[test]
2735 fn test_get_metadata_sheet_basic_fields() {
2736 let epub_file = Path::new("./test_case/epub-33.epub");
2737 let doc = EpubDoc::new(epub_file);
2738 assert!(doc.is_ok());
2739
2740 let doc = doc.unwrap();
2741 let sheet = doc.get_metadata_sheet();
2742
2743 assert_eq!(sheet.title.len(), 1);
2744 assert_eq!(sheet.title[0], "EPUB 3.3");
2745
2746 assert_eq!(sheet.language.len(), 1);
2747 assert_eq!(sheet.language[0], "en-us");
2748
2749 assert_eq!(sheet.publisher, "World Wide Web Consortium");
2750
2751 assert_eq!(
2752 sheet.rights,
2753 "https://www.w3.org/Consortium/Legal/2015/doc-license"
2754 );
2755 }
2756
2757 #[test]
2758 fn test_get_metadata_sheet_multiple_creators() {
2759 let epub_file = Path::new("./test_case/epub-33.epub");
2760 let doc = EpubDoc::new(epub_file);
2761 assert!(doc.is_ok());
2762
2763 let doc = doc.unwrap();
2764 let sheet = doc.get_metadata_sheet();
2765
2766 assert_eq!(sheet.creator.len(), 3);
2767 assert_eq!(sheet.creator[0], "Matt Garrish, DAISY Consortium");
2768 assert_eq!(sheet.creator[1], "Ivan Herman, W3C");
2769 assert_eq!(sheet.creator[2], "Dave Cramer, Invited Expert");
2770 }
2771
2772 #[test]
2773 fn test_get_metadata_sheet_multiple_subjects() {
2774 let epub_file = Path::new("./test_case/epub-33.epub");
2775 let doc = EpubDoc::new(epub_file);
2776 assert!(doc.is_ok());
2777
2778 let doc = doc.unwrap();
2779 let sheet = doc.get_metadata_sheet();
2780
2781 assert_eq!(sheet.subject.len(), 2);
2782 assert_eq!(sheet.subject[0], "Information systems~World Wide Web");
2783 assert_eq!(
2784 sheet.subject[1],
2785 "General and reference~Computing standards, RFCs and guidelines"
2786 );
2787 }
2788
2789 #[test]
2790 fn test_get_metadata_sheet_identifier_with_id() {
2791 let epub_file = Path::new("./test_case/epub-33.epub");
2792 let doc = EpubDoc::new(epub_file);
2793 assert!(doc.is_ok());
2794
2795 let doc = doc.unwrap();
2796 let sheet = doc.get_metadata_sheet();
2797
2798 assert!(sheet.identifier.contains_key("pub-id"));
2799 assert_eq!(
2800 sheet.identifier.get("pub-id"),
2801 Some(&"https://www.w3.org/TR/epub-33/".to_string())
2802 );
2803 }
2804
2805 #[test]
2806 fn test_get_metadata_sheet_missing_scalar_fields() {
2807 let epub_file = Path::new("./test_case/epub-33.epub");
2808 let doc = EpubDoc::new(epub_file);
2809 assert!(doc.is_ok());
2810
2811 let doc = doc.unwrap();
2812 let sheet = doc.get_metadata_sheet();
2813
2814 assert!(sheet.coverage.is_empty());
2815 assert!(sheet.description.is_empty());
2816 assert!(sheet.format.is_empty());
2817 assert!(sheet.source.is_empty());
2818 assert!(sheet.epub_type.is_empty());
2819 assert!(sheet.contributor.is_empty());
2820 assert!(sheet.relation.is_empty());
2821 }
2822
2823 #[test]
2824 fn test_get_metadata_sheet_title_refinement_via_get_metadata() {
2825 let epub_file = Path::new("./test_case/epub-33.epub");
2826 let doc = EpubDoc::new(epub_file);
2827 assert!(doc.is_ok());
2828
2829 let doc = doc.unwrap();
2830 let title_metadata = doc.get_metadata("title");
2831 assert!(title_metadata.is_some());
2832
2833 let title_metadata = title_metadata.unwrap();
2834 assert_eq!(title_metadata.len(), 1);
2835 assert_eq!(title_metadata[0].refined.len(), 1);
2836 assert_eq!(title_metadata[0].refined[0].property, "title-type");
2837 assert_eq!(title_metadata[0].refined[0].value, "main");
2838
2839 let sheet = doc.get_metadata_sheet();
2840 assert_eq!(sheet.title.len(), 1);
2841 assert_eq!(sheet.title[0], "EPUB 3.3");
2842 }
2843
2844 #[test]
2845 fn test_get_metadata_sheet_ignores_unknown_properties() {
2846 let epub_file = Path::new("./test_case/epub-33.epub");
2847 let doc = EpubDoc::new(epub_file);
2848 assert!(doc.is_ok());
2849
2850 let doc = doc.unwrap();
2851 let sheet = doc.get_metadata_sheet();
2852
2853 assert_eq!(sheet.title.len(), 1);
2854 assert_eq!(sheet.creator.len(), 3);
2855 assert_eq!(sheet.subject.len(), 2);
2856 }
2857
2858 #[test]
2859 fn test_get_metadata_sheet_idempotent() {
2860 let epub_file = Path::new("./test_case/epub-33.epub");
2861 let doc = EpubDoc::new(epub_file);
2862 assert!(doc.is_ok());
2863
2864 let doc = doc.unwrap();
2865 let sheet1 = doc.get_metadata_sheet();
2866 let sheet2 = doc.get_metadata_sheet();
2867
2868 assert_eq!(sheet1.title, sheet2.title);
2869 assert_eq!(sheet1.creator, sheet2.creator);
2870 assert_eq!(sheet1.language, sheet2.language);
2871 assert_eq!(sheet1.identifier, sheet2.identifier);
2872 assert_eq!(sheet1.date, sheet2.date);
2873 }
2874 }
2875}