1use std::{
24 collections::HashMap,
25 fs::{self, File},
26 io::{BufReader, Read, Seek},
27 path::{Path, PathBuf},
28 sync::{
29 Arc, Mutex,
30 atomic::{AtomicUsize, Ordering},
31 },
32};
33
34#[cfg(not(feature = "no-indexmap"))]
35use indexmap::IndexMap;
36use zip::{ZipArchive, result::ZipError};
37
38use crate::{
39 error::EpubError,
40 types::{
41 EncryptionData, EpubVersion, ManifestItem, MetadataItem, MetadataLinkItem,
42 MetadataRefinement, NavPoint, SpineItem,
43 },
44 utils::{
45 DecodeBytes, NormalizeWhitespace, XmlElement, XmlReader, adobe_font_dencryption,
46 check_realtive_link_leakage, compression_method_check, get_file_in_zip_archive,
47 idpf_font_dencryption,
48 },
49};
50
51pub struct EpubDoc<R: Read + Seek> {
77 pub(crate) archive: Arc<Mutex<ZipArchive<R>>>,
79
80 pub(crate) epub_path: PathBuf,
82
83 pub package_path: PathBuf,
85
86 pub base_path: PathBuf,
88
89 pub version: EpubVersion,
91
92 pub unique_identifier: String,
96
97 pub metadata: Vec<MetadataItem>,
99
100 pub metadata_link: Vec<MetadataLinkItem>,
102
103 #[cfg(not(feature = "no-indexmap"))]
124 pub manifest: IndexMap<String, ManifestItem>,
125 #[cfg(feature = "no-indexmap")]
126 pub manifest: HashMap<String, ManifestItem>,
127
128 pub spine: Vec<SpineItem>,
133
134 pub encryption: Option<Vec<EncryptionData>>,
136
137 pub catalog: Vec<NavPoint>,
139
140 pub catalog_title: String,
142
143 current_spine_index: AtomicUsize,
145
146 has_encryption: bool,
148}
149
150impl<R: Read + Seek> EpubDoc<R> {
151 pub fn from_reader(reader: R, epub_path: PathBuf) -> Result<Self, EpubError> {
171 let mut archive = ZipArchive::new(reader).map_err(EpubError::from)?;
181 let epub_path = fs::canonicalize(epub_path)?;
182
183 compression_method_check(&mut archive)?;
184
185 let container =
186 get_file_in_zip_archive(&mut archive, "META-INF/container.xml")?.decode()?;
187 let package_path = Self::parse_container(container)?;
188 let base_path = package_path
189 .parent()
190 .expect("the parent directory of the opf file must exist")
191 .to_path_buf();
192
193 let opf_file = get_file_in_zip_archive(
194 &mut archive,
195 package_path
196 .to_str()
197 .expect("package_path should be valid UTF-8"),
198 )?
199 .decode()?;
200 let package = XmlReader::parse(&opf_file)?;
201
202 let version = Self::determine_epub_version(&package)?;
203 let has_encryption = archive
204 .by_path(Path::new("META-INF/encryption.xml"))
205 .is_ok();
206
207 let mut doc = Self {
208 archive: Arc::new(Mutex::new(archive)),
209 epub_path,
210 package_path,
211 base_path,
212 version,
213 unique_identifier: String::new(),
214 metadata: vec![],
215 metadata_link: vec![],
216
217 #[cfg(feature = "no-indexmap")]
218 manifest: HashMap::new(),
219 #[cfg(not(feature = "no-indexmap"))]
220 manifest: IndexMap::new(),
221
222 spine: vec![],
223 encryption: None,
224 catalog: vec![],
225 catalog_title: String::new(),
226 current_spine_index: AtomicUsize::new(0),
227 has_encryption,
228 };
229
230 let metadata_element = package.find_elements_by_name("metadata").next().unwrap();
231 let manifest_element = package.find_elements_by_name("manifest").next().unwrap();
232 let spine_element = package.find_elements_by_name("spine").next().unwrap();
233
234 doc.parse_metadata(metadata_element)?;
235 doc.parse_manifest(manifest_element)?;
236 doc.parse_spine(spine_element)?;
237 doc.parse_encryption()?;
238 doc.parse_catalog()?;
239
240 doc.unique_identifier = if let Some(uid) = package.get_attr("unique-identifier") {
242 doc.metadata.iter().find(|item| {
243 item.property == "identifier" && item.id.as_ref().is_some_and(|id| id == &uid)
244 })
245 } else {
246 doc.metadata
247 .iter()
248 .find(|item| item.property == "identifier")
249 }
250 .map(|item| item.value.clone())
251 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "dc:identifier".to_string() })?;
252
253 Ok(doc)
254 }
255
256 fn parse_container(data: String) -> Result<PathBuf, EpubError> {
272 let root = XmlReader::parse(&data)?;
273 let rootfile = root
274 .find_elements_by_name("rootfile")
275 .next()
276 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "rootfile".to_string() })?;
277
278 let attr =
279 rootfile
280 .get_attr("full-path")
281 .ok_or_else(|| EpubError::MissingRequiredAttribute {
282 tag: "rootfile".to_string(),
283 attribute: "full-path".to_string(),
284 })?;
285
286 Ok(PathBuf::from(attr))
287 }
288
289 fn parse_metadata(&mut self, metadata_element: &XmlElement) -> Result<(), EpubError> {
300 const DC_NAMESPACE: &str = "http://purl.org/dc/elements/1.1/";
301 const OPF_NAMESPACE: &str = "http://www.idpf.org/2007/opf";
302
303 let mut metadata = Vec::new();
304 let mut metadata_link = Vec::new();
305 let mut refinements = HashMap::<String, Vec<MetadataRefinement>>::new();
306
307 for element in metadata_element.children() {
308 match &element.namespace {
309 Some(namespace) if namespace == DC_NAMESPACE => {
310 self.parse_dc_metadata(element, &mut metadata)?
311 }
312
313 Some(namespace) if namespace == OPF_NAMESPACE => self.parse_opf_metadata(
314 element,
315 &mut metadata,
316 &mut metadata_link,
317 &mut refinements,
318 )?,
319
320 _ => {}
321 };
322 }
323
324 for item in metadata.iter_mut() {
325 if let Some(id) = &item.id {
326 if let Some(refinements) = refinements.remove(id) {
327 item.refined = refinements;
328 }
329 }
330 }
331
332 self.metadata = metadata;
333 self.metadata_link = metadata_link;
334 Ok(())
335 }
336
337 fn parse_manifest(&mut self, manifest_element: &XmlElement) -> Result<(), EpubError> {
347 let estimated_items = manifest_element.children().count();
348 #[cfg(feature = "no-indexmap")]
349 let mut resources = HashMap::with_capacity(estimated_items);
350 #[cfg(not(feature = "no-indexmap"))]
351 let mut resources = IndexMap::with_capacity(estimated_items);
352
353 for element in manifest_element.children() {
354 let id = element
355 .get_attr("id")
356 .ok_or_else(|| EpubError::MissingRequiredAttribute {
357 tag: element.tag_name(),
358 attribute: "id".to_string(),
359 })?
360 .to_string();
361 let path = element
362 .get_attr("href")
363 .ok_or_else(|| EpubError::MissingRequiredAttribute {
364 tag: element.tag_name(),
365 attribute: "href".to_string(),
366 })?
367 .to_string();
368 let mime = element
369 .get_attr("media-type")
370 .ok_or_else(|| EpubError::MissingRequiredAttribute {
371 tag: element.tag_name(),
372 attribute: "media-type".to_string(),
373 })?
374 .to_string();
375 let properties = element.get_attr("properties");
376 let fallback = element.get_attr("fallback");
377
378 resources.insert(
379 id.clone(),
380 ManifestItem {
381 id,
382 path: self.normalize_manifest_path(&path)?,
383 mime,
384 properties,
385 fallback,
386 },
387 );
388 }
389
390 self.manifest = resources;
391 self.validate_fallback_chains();
392 Ok(())
393 }
394
395 fn parse_spine(&mut self, spine_element: &XmlElement) -> Result<(), EpubError> {
405 let mut spine = Vec::new();
406 for element in spine_element.children() {
407 let idref = element
408 .get_attr("idref")
409 .ok_or_else(|| EpubError::MissingRequiredAttribute {
410 tag: element.tag_name(),
411 attribute: "idref".to_string(),
412 })?
413 .to_string();
414 let id = element.get_attr("id");
415 let linear = element
416 .get_attr("linear")
417 .map(|linear| linear == "yes")
418 .unwrap_or(true);
419 let properties = element.get_attr("properties");
420
421 spine.push(SpineItem { idref, id, linear, properties });
422 }
423
424 self.spine = spine;
425 Ok(())
426 }
427
428 fn parse_encryption(&mut self) -> Result<(), EpubError> {
438 if !self.has_encryption() {
439 return Ok(());
440 }
441
442 let mut archive = self.archive.lock()?;
443 let encryption_file =
444 get_file_in_zip_archive(&mut archive, "META-INF/encryption.xml")?.decode()?;
445
446 let root = XmlReader::parse(&encryption_file)?;
447
448 let mut encryption_data = Vec::new();
449 for data in root.children() {
450 if data.name != "EncryptedData" {
451 continue;
452 }
453
454 let method = data
455 .find_elements_by_name("EncryptionMethod")
456 .next()
457 .ok_or_else(|| EpubError::NonCanonicalFile {
458 tag: "EncryptionMethod".to_string(),
459 })?;
460 let reference = data
461 .find_elements_by_name("CipherReference")
462 .next()
463 .ok_or_else(|| EpubError::NonCanonicalFile {
464 tag: "CipherReference".to_string(),
465 })?;
466
467 encryption_data.push(EncryptionData {
468 method: method
469 .get_attr("Algorithm")
470 .ok_or_else(|| EpubError::MissingRequiredAttribute {
471 tag: "EncryptionMethod".to_string(),
472 attribute: "Algorithm".to_string(),
473 })?
474 .to_string(),
475 data: reference
476 .get_attr("URI")
477 .ok_or_else(|| EpubError::MissingRequiredAttribute {
478 tag: "CipherReference".to_string(),
479 attribute: "URI".to_string(),
480 })?
481 .to_string(),
482 });
483 }
484
485 if !encryption_data.is_empty() {
486 self.encryption = Some(encryption_data);
487 }
488
489 Ok(())
490 }
491
492 fn parse_catalog(&mut self) -> Result<(), EpubError> {
499 const HEAD_TAGS: [&str; 6] = ["h1", "h2", "h3", "h4", "h5", "h6"];
500
501 let mut archive = self.archive.lock()?;
502 match self.version {
503 EpubVersion::Version2_0 => {
504 let opf_file =
505 get_file_in_zip_archive(&mut archive, self.package_path.to_str().unwrap())?
506 .decode()?;
507 let opf_element = XmlReader::parse(&opf_file)?;
508
509 let toc_id = opf_element
510 .find_children_by_name("spine")
511 .next()
512 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?
513 .get_attr("toc")
514 .ok_or_else(|| EpubError::MissingRequiredAttribute {
515 tag: "spine".to_string(),
516 attribute: "toc".to_string(),
517 })?
518 .to_owned();
519 let toc_path = self
520 .manifest
521 .get(&toc_id)
522 .ok_or(EpubError::ResourceIdNotExist { id: toc_id })?
523 .path
524 .to_str()
525 .unwrap();
526
527 let ncx_file = get_file_in_zip_archive(&mut archive, toc_path)?.decode()?;
528 let ncx = XmlReader::parse(&ncx_file)?;
529
530 match ncx.find_elements_by_name("docTitle").next() {
531 Some(element) => self.catalog_title = element.text(),
532 None => log::warn!(
533 "Expecting to get docTitle information from the ncx file, but it's missing."
534 ),
535 };
536
537 let nav_map = ncx
538 .find_elements_by_name("navMap")
539 .next()
540 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "navMap".to_string() })?;
541
542 self.catalog = self.parse_nav_points(nav_map)?;
543
544 Ok(())
545 }
546
547 EpubVersion::Version3_0 => {
548 let nav_path = self
549 .manifest
550 .values()
551 .find(|item| {
552 if let Some(property) = &item.properties {
553 return property.contains("nav");
554 }
555 false
556 })
557 .map(|item| item.path.clone())
558 .ok_or_else(|| EpubError::NonCanonicalEpub {
559 expected_file: "Navigation Document".to_string(),
560 })?;
561
562 let nav_file =
563 get_file_in_zip_archive(&mut archive, nav_path.to_str().unwrap())?.decode()?;
564
565 let nav_element = XmlReader::parse(&nav_file)?;
566 let nav = nav_element
567 .find_elements_by_name("nav")
568 .find(|&element| element.get_attr("epub:type") == Some(String::from("toc")))
569 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "nav".to_string() })?;
570 let nav_title = nav.find_children_by_names(&HEAD_TAGS).next();
571 let nav_list = nav
572 .find_children_by_name("ol")
573 .next()
574 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "ol".to_string() })?;
575
576 self.catalog = self.parse_catalog_list(nav_list)?;
577 if let Some(nav_title) = nav_title {
578 self.catalog_title = nav_title.text();
579 };
580 Ok(())
581 }
582 }
583 }
584
585 #[inline]
601 pub fn has_encryption(&self) -> bool {
602 self.has_encryption
603 }
604
605 pub fn get_metadata(&self, key: &str) -> Option<Vec<MetadataItem>> {
619 let metadatas = self
620 .metadata
621 .iter()
622 .filter(|item| item.property == key)
623 .cloned()
624 .collect::<Vec<MetadataItem>>();
625
626 (!metadatas.is_empty()).then_some(metadatas)
627 }
628
629 pub fn get_metadata_value(&self, key: &str) -> Option<Vec<String>> {
641 let values = self
642 .metadata
643 .iter()
644 .filter(|item| item.property == key)
645 .map(|item| item.value.clone())
646 .collect::<Vec<String>>();
647
648 (!values.is_empty()).then_some(values)
649 }
650
651 #[inline]
664 pub fn get_title(&self) -> Vec<String> {
665 self.get_metadata_value("title")
666 .expect("missing required 'title' metadata which is required by the EPUB specification")
667 }
668
669 #[inline]
683 pub fn get_language(&self) -> Vec<String> {
684 self.get_metadata_value("language").expect(
685 "missing required 'language' metadata which is required by the EPUB specification",
686 )
687 }
688
689 #[inline]
705 pub fn get_identifier(&self) -> Vec<String> {
706 self.get_metadata_value("identifier").expect(
707 "missing required 'identifier' metadata which is required by the EPUB specification",
708 )
709 }
710
711 pub fn get_manifest_item(&self, id: &str) -> Result<(Vec<u8>, String), EpubError> {
728 let resource_item = self
729 .manifest
730 .get(id)
731 .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
732
733 self.get_resource(resource_item)
734 }
735
736 pub fn get_manifest_item_by_path(&self, path: &str) -> Result<(Vec<u8>, String), EpubError> {
755 let manifest = self
756 .manifest
757 .iter()
758 .find(|(_, item)| item.path.to_str().unwrap() == path)
759 .map(|(_, manifest)| manifest)
760 .ok_or_else(|| EpubError::ResourceNotFound { resource: path.to_string() })?;
761
762 self.get_resource(manifest)
763 }
764
765 pub fn get_manifest_item_with_fallback(
781 &self,
782 id: &str,
783 supported_format: &[&str],
784 ) -> Result<(Vec<u8>, String), EpubError> {
785 let mut current_id = id;
786 let mut fallback_chain = Vec::<&str>::new();
787 'fallback: loop {
788 let manifest_item = self
789 .manifest
790 .get(current_id)
791 .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
792
793 if supported_format.contains(&manifest_item.mime.as_str()) {
794 return self.get_resource(manifest_item);
795 }
796
797 let fallback_id = match &manifest_item.fallback {
798 None => break 'fallback,
800
801 Some(id) if fallback_chain.contains(&id.as_str()) => break 'fallback,
803
804 Some(id) => {
805 fallback_chain.push(id.as_str());
806
807 id.as_str()
811 }
812 };
813
814 current_id = fallback_id;
815 }
816
817 Err(EpubError::NoSupportedFileFormat)
818 }
819
820 pub fn get_cover(&self) -> Option<(Vec<u8>, String)> {
837 self.manifest
838 .values()
839 .filter(|manifest| {
840 manifest.id.to_ascii_lowercase().contains("cover")
841 || manifest
842 .properties
843 .as_ref()
844 .map(|properties| properties.to_ascii_lowercase().contains("cover"))
845 .unwrap_or(false)
846 })
847 .find_map(|manifest| {
848 self.get_resource(manifest)
849 .map_err(|err| log::warn!("{err}"))
850 .ok()
851 })
852 }
853
854 fn get_resource(&self, resource_item: &ManifestItem) -> Result<(Vec<u8>, String), EpubError> {
856 let path = resource_item
857 .path
858 .to_str()
859 .expect("manifest item path should be valid UTF-8");
860
861 let mut archive = self.archive.lock()?;
862 let mut data = match archive.by_name(path) {
863 Ok(mut file) => {
864 let mut entry = Vec::<u8>::new();
865 file.read_to_end(&mut entry)?;
866 Ok(entry)
867 }
868 Err(ZipError::FileNotFound) => {
869 Err(EpubError::ResourceNotFound { resource: path.to_string() })
870 }
871 Err(err) => Err(EpubError::from(err)),
872 }?;
873
874 if let Some(method) = self.is_encryption_file(path) {
875 data = self.auto_dencrypt(&method, &mut data)?;
876 }
877
878 Ok((data, resource_item.mime.clone()))
879 }
880
881 pub fn navigate_by_spine_index(&mut self, index: usize) -> Option<(Vec<u8>, String)> {
900 if index >= self.spine.len() {
901 return None;
902 }
903
904 let manifest_id = self.spine[index].idref.as_ref();
905 self.current_spine_index.store(index, Ordering::SeqCst);
906 self.get_manifest_item(manifest_id)
907 .map_err(|err| log::warn!("{err}"))
908 .ok()
909 }
910
911 pub fn spine_prev(&self) -> Option<(Vec<u8>, String)> {
923 let current_index = self.current_spine_index.load(Ordering::SeqCst);
924 if current_index == 0 || !self.spine[current_index].linear {
925 return None;
926 }
927
928 let prev_index = (0..current_index)
929 .rev()
930 .find(|&index| self.spine[index].linear)?;
931
932 self.current_spine_index.store(prev_index, Ordering::SeqCst);
933 let manifest_id = self.spine[prev_index].idref.as_ref();
934 self.get_manifest_item(manifest_id)
935 .map_err(|err| log::warn!("{err}"))
936 .ok()
937 }
938
939 pub fn spine_next(&mut self) -> Option<(Vec<u8>, String)> {
951 let current_index = self.current_spine_index.load(Ordering::SeqCst);
952 if current_index >= self.spine.len() - 1 || !self.spine[current_index].linear {
953 return None;
954 }
955
956 let next_index =
957 (current_index + 1..self.spine.len()).find(|&index| self.spine[index].linear)?;
958
959 self.current_spine_index.store(next_index, Ordering::SeqCst);
960 let manifest_id = self.spine[next_index].idref.as_ref();
961 self.get_manifest_item(manifest_id)
962 .map_err(|err| log::warn!("{err}"))
963 .ok()
964 }
965
966 pub fn spine_current(&self) -> Option<(Vec<u8>, String)> {
976 let manifest_id = self.spine[self.current_spine_index.load(Ordering::SeqCst)]
977 .idref
978 .as_ref();
979 self.get_manifest_item(manifest_id)
980 .map_err(|err| log::warn!("{err}"))
981 .ok()
982 }
983
984 fn determine_epub_version(opf_element: &XmlElement) -> Result<EpubVersion, EpubError> {
994 if let Some(version) = opf_element.get_attr("version") {
996 match version.as_str() {
997 "2.0" => return Ok(EpubVersion::Version2_0),
998 "3.0" => return Ok(EpubVersion::Version3_0),
999 _ => {}
1000 }
1001 }
1002
1003 let spine_element = opf_element
1004 .find_elements_by_name("spine")
1005 .next()
1006 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?;
1007
1008 if spine_element.get_attr("toc").is_some() {
1010 return Ok(EpubVersion::Version2_0);
1011 }
1012
1013 let manifest_element = opf_element
1014 .find_elements_by_name("manifest")
1015 .next()
1016 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "manifest".to_string() })?;
1017
1018 manifest_element
1020 .children()
1021 .find_map(|element| {
1022 if let Some(id) = element.get_attr("id") {
1023 if id.eq("nav") {
1024 return Some(EpubVersion::Version3_0);
1025 }
1026 }
1027
1028 None
1029 })
1030 .ok_or(EpubError::UnrecognizedEpubVersion)
1031 }
1032
1033 #[inline]
1043 fn parse_dc_metadata(
1044 &self,
1045 element: &XmlElement,
1046 metadata: &mut Vec<MetadataItem>,
1047 ) -> Result<(), EpubError> {
1049 let id = element.get_attr("id");
1050 let lang = element.get_attr("lang");
1051 let property = element.name.clone();
1052 let value = element.text().normalize_whitespace();
1053
1054 let refined = match self.version {
1055 EpubVersion::Version2_0 => element
1058 .attributes
1059 .iter()
1060 .map(|(name, value)| {
1061 let property = name.to_string();
1062 let value = value.to_string().normalize_whitespace();
1063
1064 MetadataRefinement {
1065 refines: id.clone().unwrap(),
1066 property,
1067 value,
1068 lang: None,
1069 scheme: None,
1070 }
1071 })
1072 .collect(),
1073 EpubVersion::Version3_0 => vec![],
1074 };
1075
1076 metadata.push(MetadataItem { id, property, value, lang, refined });
1077
1078 Ok(())
1079 }
1080
1081 #[inline]
1092 fn parse_opf_metadata(
1093 &self,
1094 element: &XmlElement,
1095 metadata: &mut Vec<MetadataItem>,
1096 metadata_link: &mut Vec<MetadataLinkItem>,
1097 refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1098 ) -> Result<(), EpubError> {
1099 match element.name.as_str() {
1100 "meta" => self.parse_meta_element(element, metadata, refinements),
1101 "link" => self.parse_link_element(element, metadata_link),
1102 _ => Ok(()),
1103 }
1104 }
1105
1106 #[inline]
1107 fn parse_meta_element(
1108 &self,
1109 element: &XmlElement,
1110 metadata: &mut Vec<MetadataItem>,
1111 refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1112 ) -> Result<(), EpubError> {
1113 match self.version {
1114 EpubVersion::Version2_0 => {
1115 let property = element
1116 .get_attr("name")
1117 .ok_or_else(|| EpubError::NonCanonicalFile { tag: element.tag_name() })?;
1118 let value = element
1119 .get_attr("content")
1120 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1121 tag: element.tag_name(),
1122 attribute: "content".to_string(),
1123 })?
1124 .normalize_whitespace();
1125
1126 metadata.push(MetadataItem {
1127 id: None,
1128 property,
1129 value,
1130 lang: None,
1131 refined: vec![],
1132 });
1133 }
1134
1135 EpubVersion::Version3_0 => {
1136 let property = element.get_attr("property").ok_or_else(|| {
1137 EpubError::MissingRequiredAttribute {
1138 tag: element.tag_name(),
1139 attribute: "property".to_string(),
1140 }
1141 })?;
1142 let value = element.text().normalize_whitespace();
1143 let lang = element.get_attr("lang");
1144
1145 if let Some(refines) = element.get_attr("refines") {
1146 let id = refines.strip_prefix("#").unwrap_or(&refines).to_string();
1147 let scheme = element.get_attr("scheme");
1148 let refinement = MetadataRefinement {
1149 refines: id.clone(),
1150 property,
1151 value,
1152 lang,
1153 scheme,
1154 };
1155
1156 if let Some(refinements) = refinements.get_mut(&id) {
1157 refinements.push(refinement);
1158 } else {
1159 refinements.insert(id, vec![refinement]);
1160 }
1161 } else {
1162 let id = element.get_attr("id");
1163 let item = MetadataItem {
1164 id,
1165 property,
1166 value,
1167 lang,
1168 refined: vec![],
1169 };
1170
1171 metadata.push(item);
1172 };
1173 }
1174 }
1175 Ok(())
1176 }
1177
1178 #[inline]
1179 fn parse_link_element(
1180 &self,
1181 element: &XmlElement,
1182 metadata_link: &mut Vec<MetadataLinkItem>,
1183 ) -> Result<(), EpubError> {
1184 let href = element
1185 .get_attr("href")
1186 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1187 tag: element.tag_name(),
1188 attribute: "href".to_string(),
1189 })?;
1190 let rel = element
1191 .get_attr("rel")
1192 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1193 tag: element.tag_name(),
1194 attribute: "rel".to_string(),
1195 })?;
1196 let hreflang = element.get_attr("hreflang");
1197 let id = element.get_attr("id");
1198 let mime = element.get_attr("media-type");
1199 let properties = element.get_attr("properties");
1200
1201 metadata_link.push(MetadataLinkItem {
1202 href,
1203 rel,
1204 hreflang,
1205 id,
1206 mime,
1207 properties,
1208 refines: None,
1209 });
1210 Ok(())
1211 }
1212
1213 fn parse_nav_points(&self, parent_element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1219 let mut nav_points = Vec::new();
1220 for nav_point in parent_element.find_children_by_name("navPoint") {
1221 let label = match nav_point.find_children_by_name("navLabel").next() {
1222 Some(element) => element.text(),
1223 None => String::new(),
1224 };
1225
1226 let content = nav_point
1227 .find_children_by_name("content")
1228 .next()
1229 .map(|element| PathBuf::from(element.text()));
1230
1231 let play_order = nav_point
1232 .get_attr("playOrder")
1233 .and_then(|order| order.parse::<usize>().ok());
1234
1235 let children = self.parse_nav_points(nav_point)?;
1236
1237 nav_points.push(NavPoint { label, content, play_order, children });
1238 }
1239
1240 nav_points.sort();
1241 Ok(nav_points)
1242 }
1243
1244 fn parse_catalog_list(&self, element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1250 let mut catalog = Vec::new();
1251 for item in element.children() {
1252 if item.tag_name() != "li" {
1253 return Err(EpubError::NonCanonicalFile { tag: "li".to_string() });
1254 }
1255
1256 let title_element = item
1257 .find_children_by_names(&["span", "a"])
1258 .next()
1259 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "span/a".to_string() })?;
1260 let content_href = title_element.get_attr("href").map(PathBuf::from);
1261 let sub_list = if let Some(list) = item.find_children_by_name("ol").next() {
1262 self.parse_catalog_list(list)?
1263 } else {
1264 vec![]
1265 };
1266
1267 catalog.push(NavPoint {
1268 label: title_element.text(),
1269 content: content_href,
1270 children: sub_list,
1271 play_order: None,
1272 });
1273 }
1274
1275 Ok(catalog)
1276 }
1277
1278 #[inline]
1295 fn normalize_manifest_path(&self, path: &str) -> Result<PathBuf, EpubError> {
1296 let mut path = if path.starts_with("../") {
1297 let mut current_dir = self.epub_path.join(&self.package_path);
1298 current_dir.pop();
1299
1300 check_realtive_link_leakage(self.epub_path.clone(), current_dir, path)
1301 .map(PathBuf::from)
1302 .ok_or_else(|| EpubError::RelativeLinkLeakage { path: path.to_string() })?
1303 } else if let Some(path) = path.strip_prefix("/") {
1304 PathBuf::from(path.to_string())
1305 } else {
1306 self.base_path.join(path)
1307 };
1308
1309 #[cfg(windows)]
1310 {
1311 path = PathBuf::from(path.to_string_lossy().replace('\\', "/"));
1312 }
1313
1314 Ok(path)
1315 }
1316
1317 fn validate_fallback_chains(&self) {
1329 for (id, item) in &self.manifest {
1330 if item.fallback.is_none() {
1331 continue;
1332 }
1333
1334 let mut fallback_chain = Vec::new();
1335 if let Err(msg) = self.validate_fallback_chain(id, &mut fallback_chain) {
1336 log::warn!("Invalid fallback chain for item {}: {}", id, msg);
1337 }
1338 }
1339 }
1340
1341 fn validate_fallback_chain(
1355 &self,
1356 manifest_id: &str,
1357 fallback_chain: &mut Vec<String>,
1358 ) -> Result<(), String> {
1359 if fallback_chain.contains(&manifest_id.to_string()) {
1360 fallback_chain.push(manifest_id.to_string());
1361
1362 return Err(format!(
1363 "Circular reference detected in fallback chain for {}",
1364 fallback_chain.join("->")
1365 ));
1366 }
1367
1368 let item = self.manifest.get(manifest_id).unwrap();
1370
1371 if let Some(fallback_id) = &item.fallback {
1372 if !self.manifest.contains_key(fallback_id) {
1373 return Err(format!(
1374 "Fallback resource {} does not exist in manifest",
1375 fallback_id
1376 ));
1377 }
1378
1379 fallback_chain.push(manifest_id.to_string());
1380 self.validate_fallback_chain(fallback_id, fallback_chain)
1381 } else {
1382 Ok(())
1384 }
1385 }
1386
1387 fn is_encryption_file(&self, path: &str) -> Option<String> {
1400 self.encryption.as_ref().and_then(|encryptions| {
1401 encryptions
1402 .iter()
1403 .find(|encryption| encryption.data == path)
1404 .map(|encryption| encryption.method.clone())
1405 })
1406 }
1407
1408 #[inline]
1426 fn auto_dencrypt(&self, method: &str, data: &mut [u8]) -> Result<Vec<u8>, EpubError> {
1427 match method {
1428 "http://www.idpf.org/2008/embedding" => {
1429 Ok(idpf_font_dencryption(data, &self.unique_identifier))
1430 }
1431 "http://ns.adobe.com/pdf/enc#RC" => {
1432 Ok(adobe_font_dencryption(data, &self.unique_identifier))
1433 }
1434 _ => Err(EpubError::UnsupportedEncryptedMethod { method: method.to_string() }),
1435 }
1436 }
1437}
1438
1439impl EpubDoc<BufReader<File>> {
1440 pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, EpubError> {
1452 let file = File::open(&path).map_err(EpubError::from)?;
1453 let path = fs::canonicalize(path)?;
1454
1455 Self::from_reader(BufReader::new(file), path)
1456 }
1457
1458 pub fn is_valid_epub<P: AsRef<Path>>(path: P) -> Result<bool, EpubError> {
1473 let result = EpubDoc::new(path);
1474
1475 match result {
1476 Ok(_) => Ok(true),
1477 Err(err) if Self::is_outside_error(&err) => Err(err),
1478 Err(_) => Ok(false),
1479 }
1480 }
1481
1482 fn is_outside_error(err: &EpubError) -> bool {
1501 matches!(
1502 err,
1503 EpubError::ArchiveError { .. }
1504 | EpubError::IOError { .. }
1505 | EpubError::MutexError
1506 | EpubError::Utf8DecodeError { .. }
1507 | EpubError::Utf16DecodeError { .. }
1508 | EpubError::QuickXmlError { .. }
1509 )
1510 }
1511}
1512
1513#[cfg(test)]
1514mod tests {
1515 use std::{
1516 fs::File,
1517 io::BufReader,
1518 path::{Path, PathBuf},
1519 };
1520
1521 use crate::{epub::EpubDoc, error::EpubError, utils::XmlReader};
1522
1523 mod package_documents_tests {
1525 use std::{path::Path, sync::atomic::Ordering};
1526
1527 use crate::epub::{EpubDoc, EpubVersion};
1528
1529 #[test]
1533 fn test_pkg_collections_unknown() {
1534 let epub_file = Path::new("./test_case/pkg-collections-unknown.epub");
1535 let doc = EpubDoc::new(epub_file);
1536 assert!(doc.is_ok());
1537 }
1538
1539 #[test]
1543 fn test_pkg_creator_order() {
1544 let epub_file = Path::new("./test_case/pkg-creator-order.epub");
1545 let doc = EpubDoc::new(epub_file);
1546 assert!(doc.is_ok());
1547
1548 let doc = doc.unwrap();
1549 let creators = doc.get_metadata_value("creator");
1550 assert!(creators.is_some());
1551
1552 let creators = creators.unwrap();
1553 assert_eq!(creators.len(), 5);
1554 assert_eq!(
1555 creators,
1556 vec![
1557 "Dave Cramer",
1558 "Wendy Reid",
1559 "Dan Lazin",
1560 "Ivan Herman",
1561 "Brady Duga",
1562 ]
1563 );
1564 }
1565
1566 #[test]
1570 fn test_pkg_manifest_order() {
1571 let epub_file = Path::new("./test_case/pkg-manifest-unknown.epub");
1572 let doc = EpubDoc::new(epub_file);
1573 assert!(doc.is_ok());
1574
1575 let doc = doc.unwrap();
1576 assert_eq!(doc.manifest.len(), 2);
1577 assert!(doc.get_manifest_item("nav").is_ok());
1578 assert!(doc.get_manifest_item("content_001").is_ok());
1579 assert!(doc.get_manifest_item("content_002").is_err());
1580 }
1581
1582 #[test]
1586 fn test_pkg_meta_unknown() {
1587 let epub_file = Path::new("./test_case/pkg-meta-unknown.epub");
1588 let doc = EpubDoc::new(epub_file);
1589 assert!(doc.is_ok());
1590
1591 let doc = doc.unwrap();
1592 let value = doc.get_metadata_value("dcterms:isReferencedBy");
1593 assert!(value.is_some());
1594 let value = value.unwrap();
1595 assert_eq!(value.len(), 1);
1596 assert_eq!(
1597 value,
1598 vec!["https://www.w3.org/TR/epub-rs/#confreq-rs-pkg-meta-unknown"]
1599 );
1600
1601 let value = doc.get_metadata_value("dcterms:modified");
1602 assert!(value.is_some());
1603 let value = value.unwrap();
1604 assert_eq!(value.len(), 1);
1605 assert_eq!(value, vec!["2021-01-11T00:00:00Z"]);
1606
1607 let value = doc.get_metadata_value("dcterms:title");
1608 assert!(value.is_none());
1609 }
1610
1611 #[test]
1615 fn test_pkg_meta_white_space() {
1616 let epub_file = Path::new("./test_case/pkg-meta-whitespace.epub");
1617 let doc = EpubDoc::new(epub_file);
1618 assert!(doc.is_ok());
1619
1620 let doc = doc.unwrap();
1621 let value = doc.get_metadata_value("creator");
1622 assert!(value.is_some());
1623 let value = value.unwrap();
1624 assert_eq!(value.len(), 1);
1625 assert_eq!(value, vec!["Dave Cramer"]);
1626
1627 let value = doc.get_metadata_value("description");
1628 assert!(value.is_some());
1629 let value = value.unwrap();
1630 assert_eq!(value.len(), 1);
1631 assert_eq!(
1632 value,
1633 vec![
1634 "The package document's title and creator contain leading and trailing spaces along with excess internal whitespace. The reading system must render only a single space in all cases."
1635 ]
1636 );
1637 }
1638
1639 #[test]
1643 fn test_pkg_spine_duplicate_item_hyperlink() {
1644 let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-hyperlink.epub");
1645 let doc = EpubDoc::new(epub_file);
1646 assert!(doc.is_ok());
1647
1648 let mut doc = doc.unwrap();
1649 assert_eq!(doc.spine.len(), 4);
1650 assert_eq!(
1651 doc.navigate_by_spine_index(0).unwrap(),
1652 doc.get_manifest_item("content_001").unwrap()
1653 );
1654 assert_eq!(
1655 doc.navigate_by_spine_index(1).unwrap(),
1656 doc.get_manifest_item("content_002").unwrap()
1657 );
1658 assert_eq!(
1659 doc.navigate_by_spine_index(2).unwrap(),
1660 doc.get_manifest_item("content_002").unwrap()
1661 );
1662 assert_eq!(
1663 doc.navigate_by_spine_index(3).unwrap(),
1664 doc.get_manifest_item("content_002").unwrap()
1665 );
1666 }
1667
1668 #[test]
1672 fn test_pkg_spine_duplicate_item_rendering() {
1673 let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-rendering.epub");
1674 let doc = EpubDoc::new(epub_file);
1675 assert!(doc.is_ok());
1676
1677 let mut doc = doc.unwrap();
1678 assert_eq!(doc.spine.len(), 4);
1679
1680 let result = doc.spine_prev();
1681 assert!(result.is_none());
1682
1683 let result = doc.spine_next();
1684 assert!(result.is_some());
1685
1686 doc.spine_next();
1687 doc.spine_next();
1688 let result = doc.spine_next();
1689 assert!(result.is_none());
1690 }
1691
1692 #[test]
1696 fn test_pkg_spine_nonlinear_activation() {
1697 let epub_file = Path::new("./test_case/pkg-spine-nonlinear-activation.epub");
1698 let doc = EpubDoc::new(epub_file);
1699 assert!(doc.is_ok());
1700
1701 let mut doc = doc.unwrap();
1702 assert!(doc.spine_prev().is_none());
1703 assert!(doc.spine_next().is_none());
1704
1705 assert!(doc.navigate_by_spine_index(1).is_some());
1706 assert!(doc.spine_prev().is_none());
1707 assert!(doc.spine_next().is_none());
1708 }
1709
1710 #[test]
1714 fn test_pkg_spine_order() {
1715 let epub_file = Path::new("./test_case/pkg-spine-order.epub");
1716 let doc = EpubDoc::new(epub_file);
1717 assert!(doc.is_ok());
1718
1719 let doc = doc.unwrap();
1720 assert_eq!(doc.spine.len(), 4);
1721 assert_eq!(
1722 doc.spine
1723 .iter()
1724 .map(|item| item.idref.clone())
1725 .collect::<Vec<String>>(),
1726 vec![
1727 "d-content_001",
1728 "c-content_002",
1729 "b-content_003",
1730 "a-content_004",
1731 ]
1732 );
1733 }
1734
1735 #[test]
1739 fn test_spine_order_svg() {
1740 let epub_file = Path::new("./test_case/pkg-spine-order-svg.epub");
1741 let doc = EpubDoc::new(epub_file);
1742 assert!(doc.is_ok());
1743
1744 let mut doc = doc.unwrap();
1745 assert_eq!(doc.spine.len(), 4);
1746
1747 loop {
1748 if let Some(spine) = doc.spine_next() {
1749 let idref = doc.spine[doc.current_spine_index.load(Ordering::Relaxed)]
1750 .idref
1751 .clone();
1752 let resource = doc.get_manifest_item(&idref);
1753 assert!(resource.is_ok());
1754
1755 let resource = resource.unwrap();
1756 assert_eq!(spine, resource);
1757 } else {
1758 break;
1759 }
1760 }
1761
1762 assert_eq!(doc.current_spine_index.load(Ordering::Relaxed), 3);
1763 }
1764
1765 #[test]
1769 fn test_pkg_spine_unknown() {
1770 let epub_file = Path::new("./test_case/pkg-spine-unknown.epub");
1771 let doc = EpubDoc::new(epub_file);
1772 assert!(doc.is_ok());
1773
1774 let doc = doc.unwrap();
1775 assert_eq!(doc.spine.len(), 1);
1776 assert_eq!(doc.spine[0].idref, "content_001");
1777 assert_eq!(doc.spine[0].id, None);
1778 assert_eq!(doc.spine[0].linear, true);
1779 assert_eq!(doc.spine[0].properties, Some("untrustworthy".to_string()));
1780 }
1781
1782 #[test]
1786 fn test_pkg_title_order() {
1787 let epub_file = Path::new("./test_case/pkg-title-order.epub");
1788 let doc = EpubDoc::new(epub_file);
1789 assert!(doc.is_ok());
1790
1791 let doc = doc.unwrap();
1792 let title_list = doc.get_title();
1793 assert_eq!(title_list.len(), 6);
1794 assert_eq!(
1795 title_list,
1796 vec![
1797 "pkg-title-order",
1798 "This title must not display first",
1799 "Also, this title must not display first",
1800 "This title also must not display first",
1801 "This title must also not display first",
1802 "This title must not display first, also",
1803 ]
1804 );
1805 }
1806
1807 #[test]
1811 fn test_pkg_unique_id() {
1812 let epub_file = Path::new("./test_case/pkg-unique-id.epub");
1813 let doc_1 = EpubDoc::new(epub_file);
1814 assert!(doc_1.is_ok());
1815
1816 let epub_file = Path::new("./test_case/pkg-unique-id_duplicate.epub");
1817 let doc_2 = EpubDoc::new(epub_file);
1818 assert!(doc_2.is_ok());
1819
1820 let doc_1 = doc_1.unwrap();
1821 let doc_2 = doc_2.unwrap();
1822
1823 assert_eq!(doc_1.get_identifier(), doc_2.get_identifier());
1824 assert_eq!(doc_1.unique_identifier, "pkg-unique-id");
1825 assert_eq!(doc_2.unique_identifier, "pkg-unique-id");
1826 }
1827
1828 #[test]
1832 fn test_pkg_version_backward() {
1833 let epub_file = Path::new("./test_case/pkg-version-backward.epub");
1834 let doc = EpubDoc::new(epub_file);
1835 assert!(doc.is_ok());
1836
1837 let doc = doc.unwrap();
1838 assert_eq!(doc.version, EpubVersion::Version3_0);
1839 }
1840
1841 #[test]
1845 fn test_pkg_linked_records() {
1846 let epub_file = Path::new("./test_case/pkg-linked-records.epub");
1847 let doc = EpubDoc::new(epub_file);
1848 assert!(doc.is_ok());
1849
1850 let doc = doc.unwrap();
1851 assert_eq!(doc.metadata_link.len(), 3);
1852
1853 let item = doc.metadata_link.iter().find(|&item| {
1854 if let Some(properties) = &item.properties {
1855 properties.eq("onix")
1856 } else {
1857 false
1858 }
1859 });
1860 assert!(item.is_some());
1861 }
1862
1863 #[test]
1867 fn test_pkg_manifest_unlisted_resource() {
1868 let epub_file = Path::new("./test_case/pkg-manifest-unlisted-resource.epub");
1869 let doc = EpubDoc::new(epub_file);
1870 assert!(doc.is_ok());
1871
1872 let doc = doc.unwrap();
1873 assert!(
1874 doc.get_manifest_item_by_path("EPUB/content_001.xhtml")
1875 .is_ok()
1876 );
1877
1878 assert!(doc.get_manifest_item_by_path("EPUB/red.png").is_err());
1879 let err = doc.get_manifest_item_by_path("EPUB/red.png").unwrap_err();
1880 assert_eq!(
1881 err.to_string(),
1882 "Resource not found: Unable to find resource from \"EPUB/red.png\"."
1883 );
1884 }
1885 }
1886
1887 mod manifest_fallbacks_tests {
1891 use std::path::Path;
1892
1893 use crate::epub::EpubDoc;
1894
1895 #[test]
1899 fn test_pub_foreign_bad_fallback() {
1900 let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
1901 let doc = EpubDoc::new(epub_file);
1902 assert!(doc.is_ok());
1903
1904 let doc = doc.unwrap();
1905 assert!(doc.get_manifest_item("content_001").is_ok());
1906 assert!(doc.get_manifest_item("bar").is_ok());
1907
1908 assert_eq!(
1909 doc.get_manifest_item_with_fallback("content_001", &vec!["application/xhtml+xml"])
1910 .unwrap_err()
1911 .to_string(),
1912 "No supported file format: The fallback resource does not contain the file format you support."
1913 );
1914 }
1915
1916 #[test]
1920 fn test_pub_foreign_image() {
1921 let epub_file = Path::new("./test_case/pub-foreign_image.epub");
1922 let doc = EpubDoc::new(epub_file);
1923 assert!(doc.is_ok());
1924
1925 let doc = doc.unwrap();
1926 let result = doc.get_manifest_item_with_fallback(
1927 "image-tiff",
1928 &vec!["image/png", "application/xhtml+xml"],
1929 );
1930 assert!(result.is_ok());
1931
1932 let (_, mime) = result.unwrap();
1933 assert_eq!(mime, "image/png");
1934 }
1935
1936 #[test]
1940 fn test_pub_foreign_json_spine() {
1941 let epub_file = Path::new("./test_case/pub-foreign_json-spine.epub");
1942 let doc = EpubDoc::new(epub_file);
1943 assert!(doc.is_ok());
1944
1945 let doc = doc.unwrap();
1946 let result = doc.get_manifest_item_with_fallback(
1947 "content_primary",
1948 &vec!["application/xhtml+xml", "application/json"],
1949 );
1950 assert!(result.is_ok());
1951 let (_, mime) = result.unwrap();
1952 assert_eq!(mime, "application/json");
1953
1954 let result = doc
1955 .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
1956 assert!(result.is_ok());
1957 let (_, mime) = result.unwrap();
1958 assert_eq!(mime, "application/xhtml+xml");
1959 }
1960
1961 #[test]
1965 fn test_pub_foreign_xml_spine() {
1966 let epub_file = Path::new("./test_case/pub-foreign_xml-spine.epub");
1967 let doc = EpubDoc::new(epub_file);
1968 assert!(doc.is_ok());
1969
1970 let doc = doc.unwrap();
1971 let result = doc.get_manifest_item_with_fallback(
1972 "content_primary",
1973 &vec!["application/xhtml+xml", "application/xml"],
1974 );
1975 assert!(result.is_ok());
1976 let (_, mime) = result.unwrap();
1977 assert_eq!(mime, "application/xml");
1978
1979 let result = doc
1980 .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
1981 assert!(result.is_ok());
1982 let (_, mime) = result.unwrap();
1983 assert_eq!(mime, "application/xhtml+xml");
1984 }
1985
1986 #[test]
1990 fn test_pub_foreign_xml_suffix_spine() {
1991 let epub_file = Path::new("./test_case/pub-foreign_xml-suffix-spine.epub");
1992 let doc = EpubDoc::new(epub_file);
1993 assert!(doc.is_ok());
1994
1995 let doc = doc.unwrap();
1996 let result = doc.get_manifest_item_with_fallback(
1997 "content_primary",
1998 &vec!["application/xhtml+xml", "application/dtc+xml"],
1999 );
2000 assert!(result.is_ok());
2001 let (_, mime) = result.unwrap();
2002 assert_eq!(mime, "application/dtc+xml");
2003
2004 let result = doc
2005 .get_manifest_item_with_fallback("content_primary", &vec!["application/xhtml+xml"]);
2006 assert!(result.is_ok());
2007 let (_, mime) = result.unwrap();
2008 assert_eq!(mime, "application/xhtml+xml");
2009 }
2010 }
2011
2012 mod open_container_format_tests {
2014 use std::{cmp::min, io::Read, path::Path};
2015
2016 use sha1::{Digest, Sha1};
2017
2018 use crate::epub::EpubDoc;
2019
2020 #[test]
2024 fn test_ocf_metainf_inc() {
2025 let epub_file = Path::new("./test_case/ocf-metainf-inc.epub");
2026 let doc = EpubDoc::new(epub_file);
2027 assert!(doc.is_ok());
2028 }
2029
2030 #[test]
2034 fn test_ocf_metainf_manifest() {
2035 let epub_file = Path::new("./test_case/ocf-metainf-manifest.epub");
2036 let doc = EpubDoc::new(epub_file);
2037 assert!(doc.is_ok());
2038 }
2039
2040 #[test]
2044 fn test_ocf_package_arbitrary() {
2045 let epub_file = Path::new("./test_case/ocf-package_arbitrary.epub");
2046 let doc = EpubDoc::new(epub_file);
2047 assert!(doc.is_ok());
2048
2049 let doc = doc.unwrap();
2050 assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2051 }
2052
2053 #[test]
2057 fn test_ocf_package_multiple() {
2058 let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2059 let doc = EpubDoc::new(epub_file);
2060 assert!(doc.is_ok());
2061
2062 let doc = doc.unwrap();
2063 assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2064 assert_eq!(doc.base_path, Path::new("FOO/BAR"));
2065 }
2066
2067 #[test]
2071 fn test_ocf_url_link_leaking_relative() {
2072 let epub_file = Path::new("./test_case/ocf-url_link-leaking-relative.epub");
2073 let doc = EpubDoc::new(epub_file);
2074 assert!(doc.is_err());
2075 assert_eq!(
2076 doc.err().unwrap().to_string(),
2077 String::from(
2078 "Relative link leakage: Path \"../../../../media/imgs/monastery.jpg\" is out of container range."
2079 )
2080 )
2081 }
2082
2083 #[test]
2087 fn test_ocf_url_link_path_absolute() {
2088 let epub_file = Path::new("./test_case/ocf-url_link-path-absolute.epub");
2089 let doc = EpubDoc::new(epub_file);
2090 assert!(doc.is_ok());
2091
2092 let doc = doc.unwrap();
2093 let resource = doc.manifest.get("photo").unwrap();
2094 assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2095 }
2096
2097 #[test]
2101 fn test_ocf_url_link_relative() {
2102 let epub_file = Path::new("./test_case/ocf-url_link-relative.epub");
2103 let doc = EpubDoc::new(epub_file);
2104 assert!(doc.is_ok());
2105
2106 let doc = doc.unwrap();
2107 let resource = doc.manifest.get("photo").unwrap();
2108 assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2109 }
2110
2111 #[test]
2115 fn test_ocf_url_manifest() {
2116 let epub_file = Path::new("./test_case/ocf-url_manifest.epub");
2117 let doc = EpubDoc::new(epub_file);
2118 assert!(doc.is_ok());
2119
2120 let doc = doc.unwrap();
2121 assert!(doc.get_manifest_item("nav").is_ok());
2122 assert!(doc.get_manifest_item("content_001").is_ok());
2123 assert!(doc.get_manifest_item("content_002").is_err());
2124 }
2125
2126 #[test]
2130 fn test_ocf_url_relative() {
2131 let epub_file = Path::new("./test_case/ocf-url_relative.epub");
2132 let doc = EpubDoc::new(epub_file);
2133 assert!(doc.is_ok());
2134
2135 let doc = doc.unwrap();
2136 assert_eq!(doc.package_path, Path::new("foo/BAR/baz.opf"));
2137 assert_eq!(doc.base_path, Path::new("foo/BAR"));
2138 assert_eq!(
2139 doc.manifest.get("nav").unwrap().path,
2140 Path::new("foo/BAR/nav.xhtml")
2141 );
2142 assert_eq!(
2143 doc.manifest.get("content_001").unwrap().path,
2144 Path::new("foo/BAR/qux/content_001.xhtml")
2145 );
2146 assert!(doc.get_manifest_item("nav").is_ok());
2147 assert!(doc.get_manifest_item("content_001").is_ok());
2148 }
2149
2150 #[test]
2155 fn test_ocf_zip_comp() {
2156 let epub_file = Path::new("./test_case/ocf-zip-comp.epub");
2157 let doc = EpubDoc::new(epub_file);
2158 assert!(doc.is_ok());
2159 }
2160
2161 #[test]
2166 fn test_ocf_zip_mult() {
2167 let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2168 let doc = EpubDoc::new(epub_file);
2169 assert!(doc.is_ok());
2170 }
2171
2172 #[test]
2176 fn test_ocf_font_obfuscation() {
2177 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2178 let doc = EpubDoc::new(epub_file);
2179 assert!(doc.is_ok());
2180
2181 let doc = doc.unwrap();
2182 let unique_id = doc.unique_identifier.clone();
2183
2184 let mut hasher = Sha1::new();
2185 hasher.update(unique_id.as_bytes());
2186 let hash = hasher.finalize();
2187 let mut key = vec![0u8; 1040];
2188 for i in 0..1040 {
2189 key[i] = hash[i % hash.len()];
2190 }
2191
2192 assert!(doc.encryption.is_some());
2193 assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2194
2195 let data = &doc.encryption.unwrap()[0];
2196 assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2197
2198 let font_file = doc
2199 .archive
2200 .lock()
2201 .unwrap()
2202 .by_name(&data.data)
2203 .unwrap()
2204 .bytes()
2205 .collect::<Result<Vec<u8>, _>>();
2206 assert!(font_file.is_ok());
2207 let font_file = font_file.unwrap();
2208
2209 let mut deobfuscated = font_file.clone();
2211 for i in 0..min(1040, deobfuscated.len()) {
2212 deobfuscated[i] ^= key[i];
2213 }
2214
2215 assert!(is_valid_font(&deobfuscated));
2216 }
2217
2218 #[test]
2222 fn test_ocf_font_obfuscation_bis() {
2223 let epub_file = Path::new("./test_case/ocf-font_obfuscation_bis.epub");
2224 let doc = EpubDoc::new(epub_file);
2225 assert!(doc.is_ok());
2226
2227 let doc = doc.unwrap();
2228
2229 let wrong_unique_id = "wrong-publication-id";
2230 let mut hasher = Sha1::new();
2231 hasher.update(wrong_unique_id.as_bytes());
2232 let hash = hasher.finalize();
2233 let mut wrong_key = vec![0u8; 1040];
2234 for i in 0..1040 {
2235 wrong_key[i] = hash[i % hash.len()];
2236 }
2237
2238 assert!(doc.encryption.is_some());
2239 assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2240
2241 let data = &doc.encryption.unwrap()[0];
2242 assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2243
2244 let font_file = doc
2245 .archive
2246 .lock()
2247 .unwrap()
2248 .by_name(&data.data)
2249 .unwrap()
2250 .bytes()
2251 .collect::<Result<Vec<u8>, _>>();
2252 assert!(font_file.is_ok());
2253 let font_file = font_file.unwrap();
2254
2255 let mut deobfuscated_with_wrong_key = font_file.clone();
2257 for i in 0..std::cmp::min(1040, deobfuscated_with_wrong_key.len()) {
2258 deobfuscated_with_wrong_key[i] ^= wrong_key[i];
2259 }
2260
2261 assert!(!is_valid_font(&deobfuscated_with_wrong_key));
2262 }
2263
2264 fn is_valid_font(data: &[u8]) -> bool {
2265 if data.len() < 4 {
2266 return false;
2267 }
2268 let sig = &data[0..4];
2269 sig == b"OTTO"
2272 || sig == b"\x00\x01\x00\x00"
2273 || sig == b"\x00\x02\x00\x00"
2274 || sig == b"true"
2275 || sig == b"typ1"
2276 }
2277 }
2278
2279 #[test]
2280 fn test_parse_container() {
2281 let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2282 let doc = EpubDoc::new(epub_file);
2283 assert!(doc.is_ok());
2284
2285 let container = r#"
2287 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2288 <rootfiles></rootfiles>
2289 </container>
2290 "#
2291 .to_string();
2292
2293 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2294 assert!(result.is_err());
2295 assert_eq!(
2296 result.unwrap_err(),
2297 EpubError::NonCanonicalFile { tag: "rootfile".to_string() }
2298 );
2299
2300 let container = r#"
2301 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2302 <rootfiles>
2303 <rootfile media-type="application/oebps-package+xml"/>
2304 </rootfiles>
2305 </container>
2306 "#
2307 .to_string();
2308
2309 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2310 assert!(result.is_err());
2311 assert_eq!(
2312 result.unwrap_err(),
2313 EpubError::MissingRequiredAttribute {
2314 tag: "rootfile".to_string(),
2315 attribute: "full-path".to_string(),
2316 }
2317 );
2318
2319 let container = r#"
2320 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2321 <rootfiles>
2322 <rootfile media-type="application/oebps-package+xml" full-path="EPUB/content.opf"/>
2323 </rootfiles>
2324 </container>
2325 "#
2326 .to_string();
2327
2328 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2329 assert!(result.is_ok());
2330 assert_eq!(result.unwrap(), PathBuf::from("EPUB/content.opf"))
2331 }
2332
2333 #[test]
2334 fn test_parse_manifest() {
2335 let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2336 let doc = EpubDoc::new(epub_file);
2337 assert!(doc.is_ok());
2338
2339 let manifest = r#"
2340 <manifest>
2341 <item href="content_001.xhtml" media-type="application/xhtml+xml"/>
2342 <item properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2343 </manifest>
2344 "#;
2345 let mut doc = doc.unwrap();
2346 let element = XmlReader::parse(manifest);
2347 assert!(element.is_ok());
2348
2349 let element = element.unwrap();
2350 let result = doc.parse_manifest(&element);
2351 assert!(result.is_err());
2352 assert_eq!(
2353 result.unwrap_err(),
2354 EpubError::MissingRequiredAttribute {
2355 tag: "item".to_string(),
2356 attribute: "id".to_string(),
2357 },
2358 );
2359
2360 let manifest = r#"
2361 <manifest>
2362 <item id="content_001" media-type="application/xhtml+xml"/>
2363 <item id="nav" properties="nav" media-type="application/xhtml+xml"/>
2364 </manifest>
2365 "#;
2366 let element = XmlReader::parse(manifest);
2367 assert!(element.is_ok());
2368
2369 let element = element.unwrap();
2370 let result = doc.parse_manifest(&element);
2371 assert!(result.is_err());
2372 assert_eq!(
2373 result.unwrap_err(),
2374 EpubError::MissingRequiredAttribute {
2375 tag: "item".to_string(),
2376 attribute: "href".to_string(),
2377 },
2378 );
2379
2380 let manifest = r#"
2381 <manifest>
2382 <item id="content_001" href="content_001.xhtml"/>
2383 <item id="nav" properties="nav" href="nav.xhtml"/>
2384 </manifest>
2385 "#;
2386 let element = XmlReader::parse(manifest);
2387 assert!(element.is_ok());
2388
2389 let element = element.unwrap();
2390 let result = doc.parse_manifest(&element);
2391 assert!(result.is_err());
2392 assert_eq!(
2393 result.unwrap_err(),
2394 EpubError::MissingRequiredAttribute {
2395 tag: "item".to_string(),
2396 attribute: "media-type".to_string(),
2397 },
2398 );
2399
2400 let manifest = r#"
2401 <manifest>
2402 <item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
2403 <item id="nav" properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2404 </manifest>
2405 "#;
2406 let element = XmlReader::parse(manifest);
2407 assert!(element.is_ok());
2408
2409 let element = element.unwrap();
2410 let result = doc.parse_manifest(&element);
2411 assert!(result.is_ok());
2412 }
2413
2414 #[test]
2416 fn test_fn_has_encryption() {
2417 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2418 let doc = EpubDoc::new(epub_file);
2419 assert!(doc.is_ok());
2420
2421 let doc = doc.unwrap();
2422 assert!(doc.has_encryption());
2423 }
2424
2425 #[test]
2427 fn test_fn_parse_encryption() {
2428 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2429 let doc = EpubDoc::new(epub_file);
2430 assert!(doc.is_ok());
2431
2432 let doc = doc.unwrap();
2433 assert!(doc.encryption.is_some());
2434
2435 let encryption = doc.encryption.unwrap();
2436 assert_eq!(encryption.len(), 1);
2437 assert_eq!(encryption[0].method, "http://www.idpf.org/2008/embedding");
2438 assert_eq!(encryption[0].data, "EPUB/fonts/Lobster.ttf");
2439 }
2440
2441 #[test]
2442 fn test_get_metadata_existing_key() {
2443 let epub_file = Path::new("./test_case/epub-33.epub");
2444 let doc = EpubDoc::new(epub_file);
2445 assert!(doc.is_ok());
2446
2447 let doc = doc.unwrap();
2448
2449 let titles = doc.get_metadata("title");
2450 assert!(titles.is_some());
2451
2452 let titles = titles.unwrap();
2453 assert_eq!(titles.len(), 1);
2454 assert_eq!(titles[0].property, "title");
2455 assert_eq!(titles[0].value, "EPUB 3.3");
2456
2457 let languages = doc.get_metadata("language");
2458 assert!(languages.is_some());
2459
2460 let languages = languages.unwrap();
2461 assert_eq!(languages.len(), 1);
2462 assert_eq!(languages[0].property, "language");
2463 assert_eq!(languages[0].value, "en-us");
2464
2465 let language = doc.get_language();
2466 assert_eq!(language, vec!["en-us"]);
2467 }
2468
2469 #[test]
2470 fn test_get_metadata_nonexistent_key() {
2471 let epub_file = Path::new("./test_case/epub-33.epub");
2472 let doc = EpubDoc::new(epub_file);
2473 assert!(doc.is_ok());
2474
2475 let doc = doc.unwrap();
2476 let metadata = doc.get_metadata("nonexistent");
2477 assert!(metadata.is_none());
2478 }
2479
2480 #[test]
2481 fn test_get_metadata_multiple_items_same_type() {
2482 let epub_file = Path::new("./test_case/epub-33.epub");
2483 let doc = EpubDoc::new(epub_file);
2484 assert!(doc.is_ok());
2485
2486 let doc = doc.unwrap();
2487
2488 let creators = doc.get_metadata("creator");
2489 assert!(creators.is_some());
2490
2491 let creators = creators.unwrap();
2492 assert_eq!(creators.len(), 3);
2493
2494 assert_eq!(creators[0].id, Some("creator_id_0".to_string()));
2495 assert_eq!(creators[0].property, "creator");
2496 assert_eq!(creators[0].value, "Matt Garrish, DAISY Consortium");
2497
2498 assert_eq!(creators[1].id, Some("creator_id_1".to_string()));
2499 assert_eq!(creators[1].property, "creator");
2500 assert_eq!(creators[1].value, "Ivan Herman, W3C");
2501
2502 assert_eq!(creators[2].id, Some("creator_id_2".to_string()));
2503 assert_eq!(creators[2].property, "creator");
2504 assert_eq!(creators[2].value, "Dave Cramer, Invited Expert");
2505 }
2506
2507 #[test]
2508 fn test_get_metadata_with_refinement() {
2509 let epub_file = Path::new("./test_case/epub-33.epub");
2510 let doc = EpubDoc::new(epub_file);
2511 assert!(doc.is_ok());
2512
2513 let doc = doc.unwrap();
2514
2515 let title = doc.get_metadata("title");
2516 assert!(title.is_some());
2517
2518 let title = title.unwrap();
2519 assert_eq!(title.len(), 1);
2520 assert_eq!(title[0].refined.len(), 1);
2521 assert_eq!(title[0].refined[0].property, "title-type");
2522 assert_eq!(title[0].refined[0].value, "main");
2523 }
2524
2525 #[test]
2526 fn test_get_manifest_item_with_fallback() {
2527 let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
2528 let doc = EpubDoc::new(epub_file);
2529 assert!(doc.is_ok());
2530
2531 let doc = doc.unwrap();
2532 assert!(doc.get_manifest_item("content_001").is_ok());
2533 assert!(doc.get_manifest_item("bar").is_ok());
2534
2535 if let Ok((_, mime)) =
2537 doc.get_manifest_item_with_fallback("content_001", &vec!["image/psd"])
2538 {
2539 assert_eq!(mime, "image/psd");
2540 } else {
2541 assert!(false, "get_manifest_item_with_fallback failed");
2542 }
2543
2544 assert_eq!(
2546 doc.get_manifest_item_with_fallback("content_001", &vec!["application/xhtml+xml"])
2547 .unwrap_err()
2548 .to_string(),
2549 "No supported file format: The fallback resource does not contain the file format you support."
2550 );
2551 }
2552
2553 #[test]
2554 fn test_get_cover() {
2555 let epub_file = Path::new("./test_case/pkg-cover-image.epub");
2556 let doc = EpubDoc::new(epub_file);
2557 if let Err(err) = &doc {
2558 println!("{}", err);
2559 }
2560 assert!(doc.is_ok());
2561
2562 let doc = doc.unwrap();
2563 let result = doc.get_cover();
2564 assert!(result.is_some());
2565
2566 let (data, mime) = result.unwrap();
2567 assert_eq!(data.len(), 5785);
2568 assert_eq!(mime, "image/jpeg");
2569 }
2570
2571 #[test]
2572 fn test_epub_2() {
2573 let epub_file = Path::new("./test_case/epub-2.epub");
2574 let doc = EpubDoc::new(epub_file);
2575 assert!(doc.is_ok());
2576
2577 let doc = doc.unwrap();
2578
2579 let titles = doc.get_title();
2580 assert_eq!(titles, vec!["Minimal EPUB 2.0"]);
2581 }
2582
2583 #[test]
2584 fn test_is_valid_epub_valid_file() {
2585 let result = EpubDoc::is_valid_epub("./test_case/epub-2.epub");
2586 assert!(result.is_ok());
2587 assert_eq!(result.unwrap(), true);
2588 }
2589
2590 #[test]
2591 fn test_is_valid_epub_invalid_path() {
2592 let result = EpubDoc::is_valid_epub("./test_case/nonexistent.epub");
2593 assert!(result.is_err());
2594 }
2595
2596 #[test]
2597 fn test_is_valid_epub_corrupted_zip() {
2598 let temp_dir = std::env::temp_dir();
2599 let corrupted_file = temp_dir.join("corrupted.epub");
2600
2601 std::fs::write(&corrupted_file, b"not a valid zip file").unwrap();
2602
2603 let result = EpubDoc::is_valid_epub(&corrupted_file);
2604
2605 assert!(result.is_err());
2606 let err = result.unwrap_err();
2607 assert!(matches!(err, EpubError::ArchiveError { .. }));
2608
2609 std::fs::remove_file(corrupted_file).ok();
2610 }
2611
2612 #[test]
2613 fn test_is_valid_epub_valid_epub_3() {
2614 let result = EpubDoc::is_valid_epub("./test_case/epub-33.epub");
2615 assert!(result.is_ok());
2616 assert_eq!(result.unwrap(), true);
2617 }
2618
2619 #[test]
2620 fn test_is_outside_error() {
2621 let archive_error = EpubError::ArchiveError {
2622 source: zip::result::ZipError::Io(std::io::Error::new(
2623 std::io::ErrorKind::Other,
2624 "test",
2625 )),
2626 };
2627 assert!(EpubDoc::<BufReader<File>>::is_outside_error(&archive_error));
2628
2629 let io_error = EpubError::IOError {
2630 source: std::io::Error::new(std::io::ErrorKind::NotFound, "test"),
2631 };
2632 assert!(EpubDoc::<BufReader<File>>::is_outside_error(&io_error));
2633
2634 let non_canonical = EpubError::NonCanonicalEpub { expected_file: "test".to_string() };
2635 assert!(!EpubDoc::<BufReader<File>>::is_outside_error(
2636 &non_canonical
2637 ));
2638
2639 let missing_attr = EpubError::MissingRequiredAttribute {
2640 tag: "test".to_string(),
2641 attribute: "id".to_string(),
2642 };
2643 assert!(!EpubDoc::<BufReader<File>>::is_outside_error(&missing_attr));
2644 }
2645}