1use std::{
26 collections::HashMap,
27 fs::{File, canonicalize},
28 io::{BufReader, Read, Seek},
29 path::{Path, PathBuf},
30};
31
32use log::warn;
33use zip::{ZipArchive, result::ZipError};
34
35use crate::{
36 error::EpubError,
37 types::{
38 EncryptionData, EpubVersion, ManifestItem, MetadataItem, MetadataLinkItem,
39 MetadataRefinement, NavPoint, SpineItem,
40 },
41 utils::{
42 DecodeBytes, NormalizeWhitespace, XmlElement, XmlReader, adobe_font_dencryption,
43 check_realtive_link_leakage, compression_method_check, get_file_in_zip_archive,
44 idpf_font_dencryption,
45 },
46};
47
48pub struct EpubDoc<R: Read + Seek> {
69 pub(crate) archive: ZipArchive<R>,
71
72 pub(crate) epub_path: PathBuf,
74
75 pub package_path: PathBuf,
77
78 pub base_path: PathBuf,
80
81 pub version: EpubVersion,
83
84 pub unique_identifier: String,
88
89 pub metadata: Vec<MetadataItem>,
91
92 pub metadata_link: Vec<MetadataLinkItem>,
94
95 pub manifest: HashMap<String, ManifestItem>,
100
101 pub spine: Vec<SpineItem>,
106
107 pub encryption: Option<Vec<EncryptionData>>,
109
110 pub catalog: Vec<NavPoint>,
112
113 pub catalog_title: String,
115
116 pub current_spine_index: usize,
118}
119
120impl<R: Read + Seek> EpubDoc<R> {
121 pub fn from_reader(reader: R, epub_path: PathBuf) -> Result<Self, EpubError> {
140 let mut archive = ZipArchive::new(reader).map_err(EpubError::from)?;
150 let epub_path = canonicalize(epub_path)?;
151
152 compression_method_check(&mut archive)?;
153
154 let container =
155 get_file_in_zip_archive(&mut archive, "META-INF/container.xml")?.decode()?;
156 let package_path = Self::parse_container(container)?;
157 let base_path = package_path
158 .parent()
159 .expect("所有文件的父目录不能为空")
160 .to_path_buf();
161
162 let opf_file =
163 get_file_in_zip_archive(&mut archive, package_path.to_str().unwrap())?.decode()?;
164 let package = XmlReader::parse(&opf_file)?;
165 let version = Self::determine_epub_version(&package)?;
169
170 let mut doc = Self {
171 archive,
172 epub_path,
173 package_path,
174 base_path,
175 version,
176 unique_identifier: String::new(),
177 metadata: vec![],
178 metadata_link: vec![],
179 manifest: HashMap::new(),
180 spine: vec![],
181 encryption: None,
182 catalog: vec![],
183 catalog_title: String::new(),
184 current_spine_index: 0,
185 };
186
187 let metadata_element = package.find_elements_by_name("metadata").next().unwrap();
188 let manifest_element = package.find_elements_by_name("manifest").next().unwrap();
189 let spine_element = package.find_elements_by_name("spine").next().unwrap();
190
191 doc.parse_metadata(metadata_element)?;
192 doc.parse_manifest(manifest_element)?;
193 doc.parse_spine(spine_element)?;
194 doc.parse_encryption()?;
195 doc.parse_catalog()?;
196
197 doc.unique_identifier = if let Some(uid) = package.get_attr("unique-identifier") {
199 doc.metadata.iter().find(|item| {
200 item.property == "identifier" && item.id.as_ref().is_some_and(|id| id == &uid)
201 })
202 } else {
203 doc.metadata
204 .iter()
205 .find(|item| item.property == "identifier")
206 }
207 .map(|item| item.value.clone())
208 .ok_or_else(|| EpubError::NonCanonicalFile {
209 tag: "dc:identifier".to_string(),
210 })?;
211
212 Ok(doc)
213 }
214
215 fn parse_container(data: String) -> Result<PathBuf, EpubError> {
231 let root = XmlReader::parse(&data)?;
232 let rootfile = root
233 .find_elements_by_name("rootfile")
234 .next()
235 .ok_or_else(|| EpubError::NonCanonicalFile {
236 tag: "rootfile".to_string(),
237 })?;
238
239 let attr =
240 rootfile
241 .get_attr("full-path")
242 .ok_or_else(|| EpubError::MissingRequiredAttribute {
243 tag: "rootfile".to_string(),
244 attribute: "full-path".to_string(),
245 })?;
246
247 Ok(PathBuf::from(attr))
248 }
249
250 fn parse_metadata(&mut self, metadata_element: &XmlElement) -> Result<(), EpubError> {
261 const DC_NAMESPACE: &str = "http://purl.org/dc/elements/1.1/";
262 const OPF_NAMESPACE: &str = "http://www.idpf.org/2007/opf";
263
264 let mut metadata = Vec::new();
265 let mut metadata_link = Vec::new();
266 let mut refinements = HashMap::<String, Vec<MetadataRefinement>>::new();
267
268 for element in metadata_element.children() {
269 match &element.namespace {
270 Some(namespace) if namespace == DC_NAMESPACE => {
271 self.parse_dc_metadata(element, &mut metadata)?
272 }
273
274 Some(namespace) if namespace == OPF_NAMESPACE => self.parse_opf_metadata(
275 element,
276 &mut metadata,
277 &mut metadata_link,
278 &mut refinements,
279 )?,
280
281 _ => {}
282 };
283 }
284
285 for item in metadata.iter_mut() {
286 if let Some(id) = &item.id {
287 if let Some(refinements) = refinements.remove(id) {
288 item.refined = refinements;
289 }
290 }
291 }
292
293 self.metadata = metadata;
294 self.metadata_link = metadata_link;
295 Ok(())
296 }
297
298 fn parse_manifest(&mut self, manifest_element: &XmlElement) -> Result<(), EpubError> {
308 let estimated_items = manifest_element.children().count();
309 let mut resources = HashMap::with_capacity(estimated_items);
310
311 for element in manifest_element.children() {
312 let id = element
313 .get_attr("id")
314 .ok_or_else(|| EpubError::MissingRequiredAttribute {
315 tag: element.tag_name(),
316 attribute: "id".to_string(),
317 })?
318 .to_string();
319 let path = element
320 .get_attr("href")
321 .ok_or_else(|| EpubError::MissingRequiredAttribute {
322 tag: element.tag_name(),
323 attribute: "href".to_string(),
324 })?
325 .to_string();
326 let mime = element
327 .get_attr("media-type")
328 .ok_or_else(|| EpubError::MissingRequiredAttribute {
329 tag: element.tag_name(),
330 attribute: "media-type".to_string(),
331 })?
332 .to_string();
333 let properties = element.get_attr("properties");
334 let fallback = element.get_attr("fallback");
335
336 resources.insert(
337 id.clone(),
338 ManifestItem {
339 id,
340 path: self.normalize_manifest_path(&path)?,
341 mime,
342 properties,
343 fallback,
344 },
345 );
346 }
347
348 self.manifest = resources;
349 self.validate_fallback_chains();
350 Ok(())
351 }
352
353 fn parse_spine(&mut self, spine_element: &XmlElement) -> Result<(), EpubError> {
363 let mut spine = Vec::new();
364 for element in spine_element.children() {
365 let idref = element
366 .get_attr("idref")
367 .ok_or_else(|| EpubError::MissingRequiredAttribute {
368 tag: element.tag_name(),
369 attribute: "idref".to_string(),
370 })?
371 .to_string();
372 let id = element.get_attr("id");
373 let linear = element
374 .get_attr("linear")
375 .map(|linear| linear == "yes")
376 .unwrap_or(true);
377 let properties = element.get_attr("properties");
378
379 spine.push(SpineItem {
380 idref,
381 id,
382 linear,
383 properties,
384 });
385 }
386
387 self.spine = spine;
388 Ok(())
389 }
390
391 fn parse_encryption(&mut self) -> Result<(), EpubError> {
401 if !self.has_encryption() {
402 return Ok(());
403 }
404
405 let encryption_file =
406 get_file_in_zip_archive(&mut self.archive, "META-INF/encryption.xml")?.decode()?;
407
408 let root = XmlReader::parse(&encryption_file)?;
409
410 let mut encryption_data = Vec::new();
411 for data in root.children() {
412 if data.name != "EncryptedData" {
413 continue;
414 }
415
416 let method = data
417 .find_elements_by_name("EncryptionMethod")
418 .next()
419 .ok_or_else(|| EpubError::NonCanonicalFile {
420 tag: "EncryptionMethod".to_string(),
421 })?;
422 let reference = data
423 .find_elements_by_name("CipherReference")
424 .next()
425 .ok_or_else(|| EpubError::NonCanonicalFile {
426 tag: "CipherReference".to_string(),
427 })?;
428
429 encryption_data.push(EncryptionData {
430 method: method
431 .get_attr("Algorithm")
432 .ok_or_else(|| EpubError::MissingRequiredAttribute {
433 tag: "EncryptionMethod".to_string(),
434 attribute: "Algorithm".to_string(),
435 })?
436 .to_string(),
437 data: reference
438 .get_attr("URI")
439 .ok_or_else(|| EpubError::MissingRequiredAttribute {
440 tag: "CipherReference".to_string(),
441 attribute: "URI".to_string(),
442 })?
443 .to_string(),
444 });
445 }
446
447 if !encryption_data.is_empty() {
448 self.encryption = Some(encryption_data);
449 }
450
451 Ok(())
452 }
453
454 fn parse_catalog(&mut self) -> Result<(), EpubError> {
461 const HEAD_TAGS: [&str; 6] = ["h1", "h2", "h3", "h4", "h5", "h6"];
462
463 match self.version {
464 EpubVersion::Version2_0 => {
465 let opf_file = get_file_in_zip_archive(
466 &mut self.archive,
467 self.package_path.to_str().unwrap(),
468 )?
469 .decode()?;
470 let opf_element = XmlReader::parse(&opf_file)?;
471
472 let toc_id = opf_element
473 .find_children_by_name("spine")
474 .next()
475 .ok_or_else(|| EpubError::NonCanonicalFile {
476 tag: "spine".to_string(),
477 })?
478 .get_attr("toc")
479 .ok_or_else(|| EpubError::MissingRequiredAttribute {
480 tag: "spine".to_string(),
481 attribute: "toc".to_string(),
482 })?
483 .to_owned();
484 let toc_path = self
485 .manifest
486 .get(&toc_id)
487 .ok_or(EpubError::ResourceIdNotExist { id: toc_id })?
488 .path
489 .to_str()
490 .unwrap();
491
492 let ncx_file = get_file_in_zip_archive(&mut self.archive, toc_path)?.decode()?;
493 let ncx = XmlReader::parse(&ncx_file)?;
494
495 match ncx.find_elements_by_name("docTitle").next() {
496 Some(element) => self.catalog_title = element.text(),
497 None => warn!(
498 "Expecting to get docTitle information from the ncx file, but it's missing."
499 ),
500 };
501
502 let nav_map = ncx.find_elements_by_name("navMap").next().ok_or_else(|| {
503 EpubError::NonCanonicalFile {
504 tag: "navMap".to_string(),
505 }
506 })?;
507
508 self.catalog = self.parse_nav_points(nav_map)?;
509
510 Ok(())
511 }
512
513 EpubVersion::Version3_0 => {
514 let nav_path = self
515 .manifest
516 .values()
517 .find(|item| {
518 if let Some(property) = &item.properties {
519 return property.contains("nav");
520 }
521 false
522 })
523 .map(|item| item.path.clone())
524 .ok_or_else(|| EpubError::NonCanonicalEpub {
525 expected_file: "Navigation Document".to_string(),
526 })?;
527
528 let nav_file =
529 get_file_in_zip_archive(&mut self.archive, nav_path.to_str().unwrap())?
530 .decode()?;
531
532 let nav_element = XmlReader::parse(&nav_file)?;
533 let nav = nav_element
534 .find_elements_by_name("nav")
535 .find(|&element| element.get_attr("epub:type") == Some(String::from("toc")))
536 .ok_or_else(|| EpubError::NonCanonicalFile {
537 tag: "nav".to_string(),
538 })?;
539 let nav_title = nav.find_children_by_names(&HEAD_TAGS).next();
540 let nav_list = nav.find_children_by_name("ol").next().ok_or_else(|| {
541 EpubError::NonCanonicalFile {
542 tag: "ol".to_string(),
543 }
544 })?;
545
546 self.catalog = self.parse_catalog_list(nav_list)?;
547 if let Some(nav_title) = nav_title {
548 self.catalog_title = nav_title.text();
549 };
550 Ok(())
551 }
552 }
553 }
554
555 pub fn has_encryption(&mut self) -> bool {
571 self.archive
572 .by_path(Path::new("META-INF/encryption.xml"))
573 .is_ok()
574 }
575
576 pub fn get_metadata(&self, key: &str) -> Option<Vec<MetadataItem>> {
590 let metadatas = self
591 .metadata
592 .iter()
593 .filter(|item| item.property == key)
594 .cloned()
595 .collect::<Vec<MetadataItem>>();
596
597 (!metadatas.is_empty()).then_some(metadatas)
598 }
599
600 pub fn get_metadata_value(&self, key: &str) -> Option<Vec<String>> {
612 let values = self
613 .metadata
614 .iter()
615 .filter(|item| item.property == key)
616 .map(|item| item.value.clone())
617 .collect::<Vec<String>>();
618
619 (!values.is_empty()).then_some(values)
620 }
621
622 pub fn get_title(&self) -> Result<Vec<String>, EpubError> {
635 self.get_metadata_value("title")
636 .ok_or_else(|| EpubError::NonCanonicalFile {
637 tag: "title".to_string(),
638 })
639 }
640
641 pub fn get_language(&self) -> Result<Vec<String>, EpubError> {
655 self.get_metadata_value("language")
656 .ok_or_else(|| EpubError::NonCanonicalFile {
657 tag: "language".to_string(),
658 })
659 }
660
661 pub fn get_identifier(&self) -> Result<Vec<String>, EpubError> {
677 self.get_metadata_value("identifier")
678 .ok_or_else(|| EpubError::NonCanonicalFile {
679 tag: "identifier".to_string(),
680 })
681 }
682
683 pub fn get_manifest_item(&mut self, id: &str) -> Result<(Vec<u8>, String), EpubError> {
700 let resource_item = self
701 .manifest
702 .get(id)
703 .cloned()
704 .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
705
706 let path = resource_item.path.to_str().unwrap();
707
708 let mut data = match self.archive.by_name(path) {
709 Ok(mut file) => {
710 let mut entry = Vec::<u8>::new();
711 file.read_to_end(&mut entry)?;
712
713 Ok(entry)
714 }
715 Err(ZipError::FileNotFound) => Err(EpubError::ResourceNotFound {
716 resource: path.to_string(),
717 }),
718 Err(err) => Err(EpubError::from(err)),
719 }?;
720
721 if let Some(method) = self.is_encryption_file(path) {
722 data = self.auto_dencrypt(&method, &mut data)?;
723 }
724
725 Ok((data, resource_item.mime))
726 }
727
728 pub fn get_manifest_item_by_path(
747 &mut self,
748 path: &str,
749 ) -> Result<(Vec<u8>, String), EpubError> {
750 let id = self
751 .manifest
752 .iter()
753 .find(|(_, item)| item.path.to_str().unwrap() == path)
754 .map(|(id, _)| id.to_string())
755 .ok_or_else(|| EpubError::ResourceNotFound {
756 resource: path.to_string(),
757 })?;
758
759 self.get_manifest_item(&id)
760 }
761
762 pub fn get_manifest_item_with_fallback(
778 &mut self,
779 id: &str,
780 supported_format: Vec<&str>,
781 ) -> Result<(Vec<u8>, String), EpubError> {
782 let mut manifest_item = self
783 .manifest
784 .get(id)
785 .cloned()
786 .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
787
788 let mut current_manifest_id = id.to_string();
789 let mut fallback_chain = Vec::<String>::new();
790 'fallback: loop {
791 if supported_format.contains(&manifest_item.mime.as_str()) {
792 return self.get_manifest_item(¤t_manifest_id);
793 }
794
795 let fallback_id = manifest_item.fallback.clone();
796
797 match fallback_id {
798 None => break 'fallback,
800
801 Some(id) if fallback_chain.contains(&id) => break 'fallback,
803
804 Some(id) => {
805 fallback_chain.push(id.clone());
806
807 manifest_item = self
811 .manifest
812 .get(&manifest_item.fallback.unwrap())
813 .cloned()
814 .ok_or(EpubError::ResourceIdNotExist { id: id.clone() })?;
815 current_manifest_id = id;
816 }
817 };
818 }
819
820 Err(EpubError::NoSupportedFileFormat)
821 }
822
823 pub fn get_cover(&mut self) -> Option<(Vec<u8>, String)> {
840 self.manifest
841 .values()
842 .filter_map(|manifest| {
843 if manifest.id.to_ascii_lowercase().contains("cover") {
844 return Some(manifest.id.clone());
845 }
846
847 if let Some(properties) = &manifest.properties {
848 if properties.to_ascii_lowercase().contains("cover") {
849 return Some(manifest.id.clone());
850 }
851 }
852
853 None
854 })
855 .collect::<Vec<String>>()
856 .iter()
857 .find_map(|id| self.get_manifest_item(id).ok())
858 }
859
860 pub fn navigate_by_spine_index(&mut self, index: usize) -> Option<(Vec<u8>, String)> {
879 if index >= self.spine.len() {
880 return None;
881 }
882
883 let manifest_id = self.spine[index].idref.clone();
884 self.current_spine_index = index;
885 self.get_manifest_item(&manifest_id).ok()
886 }
887
888 pub fn spine_prev(&mut self) -> Option<(Vec<u8>, String)> {
900 if self.current_spine_index == 0 || !self.spine[self.current_spine_index].linear {
901 return None;
902 }
903
904 let prev_index = (0..self.current_spine_index)
905 .rev()
906 .find(|&index| self.spine[index].linear)?;
907
908 self.current_spine_index = prev_index;
909 let manifest_id = self.spine[prev_index].idref.clone();
910 self.get_manifest_item(&manifest_id).ok()
911 }
912
913 pub fn spine_next(&mut self) -> Option<(Vec<u8>, String)> {
925 if self.current_spine_index >= self.spine.len() - 1
926 || !self.spine[self.current_spine_index].linear
927 {
928 return None;
929 }
930
931 let next_index = (self.current_spine_index + 1..self.spine.len())
932 .find(|&index| self.spine[index].linear)?;
933
934 self.current_spine_index = next_index;
935 let manifest_id = self.spine[next_index].idref.clone();
936 self.get_manifest_item(&manifest_id).ok()
937 }
938
939 pub fn spine_current(&mut self) -> Option<(Vec<u8>, String)> {
949 let manifest_id = self.spine[self.current_spine_index].idref.clone();
950 self.get_manifest_item(&manifest_id).ok()
951 }
952
953 fn determine_epub_version(opf_element: &XmlElement) -> Result<EpubVersion, EpubError> {
963 if let Some(version) = opf_element.get_attr("version") {
965 match version.as_str() {
966 "2.0" => return Ok(EpubVersion::Version2_0),
967 "3.0" => return Ok(EpubVersion::Version3_0),
968 _ => {}
969 }
970 }
971
972 let spine_element = opf_element
973 .find_elements_by_name("spine")
974 .next()
975 .ok_or_else(|| EpubError::NonCanonicalFile {
976 tag: "spine".to_string(),
977 })?;
978
979 if spine_element.get_attr("toc").is_some() {
981 return Ok(EpubVersion::Version2_0);
982 }
983
984 let manifest_element = opf_element
985 .find_elements_by_name("manifest")
986 .next()
987 .ok_or_else(|| EpubError::NonCanonicalFile {
988 tag: "manifest".to_string(),
989 })?;
990
991 manifest_element
993 .children()
994 .find_map(|element| {
995 if let Some(id) = element.get_attr("id") {
996 if id.eq("nav") {
997 return Some(EpubVersion::Version3_0);
998 }
999 }
1000
1001 None
1002 })
1003 .ok_or(EpubError::UnrecognizedEpubVersion)
1004 }
1005
1006 #[inline]
1016 fn parse_dc_metadata(
1017 &mut self,
1018 element: &XmlElement,
1019 metadata: &mut Vec<MetadataItem>,
1020 ) -> Result<(), EpubError> {
1022 let id = element.get_attr("id");
1023 let lang = element.get_attr("lang");
1024 let property = element.name.clone();
1025 let value = element.text().normalize_whitespace();
1026
1027 let refined = match self.version {
1028 EpubVersion::Version2_0 => element
1031 .attributes
1032 .iter()
1033 .map(|(name, value)| {
1034 let property = name.to_string();
1035 let value = value.to_string().normalize_whitespace();
1036
1037 MetadataRefinement {
1038 refines: id.clone().unwrap(),
1039 property,
1040 value,
1041 lang: None,
1042 scheme: None,
1043 }
1044 })
1045 .collect(),
1046 EpubVersion::Version3_0 => vec![],
1047 };
1048
1049 metadata.push(MetadataItem {
1050 id,
1051 property,
1052 value,
1053 lang,
1054 refined,
1055 });
1056
1057 Ok(())
1058 }
1059
1060 #[inline]
1071 fn parse_opf_metadata(
1072 &mut self,
1073 element: &XmlElement,
1074 metadata: &mut Vec<MetadataItem>,
1075 metadata_link: &mut Vec<MetadataLinkItem>,
1076 refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1077 ) -> Result<(), EpubError> {
1078 match element.name.as_str() {
1079 "meta" => self.parse_meta_element(element, metadata, refinements),
1080 "link" => self.parse_link_element(element, metadata_link),
1081 _ => Ok(()),
1082 }
1083 }
1084
1085 #[inline]
1086 fn parse_meta_element(
1087 &mut self,
1088 element: &XmlElement,
1089 metadata: &mut Vec<MetadataItem>,
1090 refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1091 ) -> Result<(), EpubError> {
1092 match self.version {
1093 EpubVersion::Version2_0 => {
1094 let property =
1095 element
1096 .get_attr("name")
1097 .ok_or_else(|| EpubError::NonCanonicalFile {
1098 tag: element.tag_name(),
1099 })?;
1100 let value = element
1101 .get_attr("content")
1102 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1103 tag: element.tag_name(),
1104 attribute: "content".to_string(),
1105 })?
1106 .normalize_whitespace();
1107
1108 metadata.push(MetadataItem {
1109 id: None,
1110 property,
1111 value,
1112 lang: None,
1113 refined: vec![],
1114 });
1115 }
1116
1117 EpubVersion::Version3_0 => {
1118 let property = element.get_attr("property").ok_or_else(|| {
1119 EpubError::MissingRequiredAttribute {
1120 tag: element.tag_name(),
1121 attribute: "property".to_string(),
1122 }
1123 })?;
1124 let value = element.text().normalize_whitespace();
1125 let lang = element.get_attr("lang");
1126
1127 if let Some(refines) = element.get_attr("refines") {
1128 let id = refines.strip_prefix("#").unwrap_or(&refines).to_string();
1129 let scheme = element.get_attr("scheme");
1130 let refinement = MetadataRefinement {
1131 refines: id.clone(),
1132 property,
1133 value,
1134 lang,
1135 scheme,
1136 };
1137
1138 if let Some(refinements) = refinements.get_mut(&id) {
1139 refinements.push(refinement);
1140 } else {
1141 refinements.insert(id, vec![refinement]);
1142 }
1143 } else {
1144 let id = element.get_attr("id");
1145 let item = MetadataItem {
1146 id,
1147 property,
1148 value,
1149 lang,
1150 refined: vec![],
1151 };
1152
1153 metadata.push(item);
1154 };
1155 }
1156 }
1157 Ok(())
1158 }
1159
1160 #[inline]
1161 fn parse_link_element(
1162 &mut self,
1163 element: &XmlElement,
1164 metadata_link: &mut Vec<MetadataLinkItem>,
1165 ) -> Result<(), EpubError> {
1166 let href = element
1167 .get_attr("href")
1168 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1169 tag: element.tag_name(),
1170 attribute: "href".to_string(),
1171 })?;
1172 let rel = element
1173 .get_attr("rel")
1174 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1175 tag: element.tag_name(),
1176 attribute: "rel".to_string(),
1177 })?;
1178 let hreflang = element.get_attr("hreflang");
1179 let id = element.get_attr("id");
1180 let mime = element.get_attr("media-type");
1181 let properties = element.get_attr("properties");
1182
1183 metadata_link.push(MetadataLinkItem {
1184 href,
1185 rel,
1186 hreflang,
1187 id,
1188 mime,
1189 properties,
1190 refines: None,
1191 });
1192 Ok(())
1193 }
1194
1195 fn parse_nav_points(&self, parent_element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1201 let mut nav_points = Vec::new();
1202 for nav_point in parent_element.find_children_by_name("navPoint") {
1203 let label = match nav_point.find_children_by_name("navLabel").next() {
1204 Some(element) => element.text(),
1205 None => String::new(),
1206 };
1207
1208 let content = nav_point
1209 .find_children_by_name("content")
1210 .next()
1211 .map(|element| PathBuf::from(element.text()));
1212
1213 let play_order = nav_point
1214 .get_attr("playOrder")
1215 .and_then(|order| order.parse::<usize>().ok());
1216
1217 let children = self.parse_nav_points(nav_point)?;
1218
1219 nav_points.push(NavPoint {
1220 label,
1221 content,
1222 play_order,
1223 children,
1224 });
1225 }
1226
1227 nav_points.sort();
1228 Ok(nav_points)
1229 }
1230
1231 fn parse_catalog_list(&self, element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1237 let mut catalog = Vec::new();
1238 for item in element.children() {
1239 if item.tag_name() != "li" {
1240 return Err(EpubError::NonCanonicalFile {
1241 tag: "li".to_string(),
1242 });
1243 }
1244
1245 let title_element = item
1246 .find_children_by_names(&["span", "a"])
1247 .next()
1248 .ok_or_else(|| EpubError::NonCanonicalFile {
1249 tag: "span/a".to_string(),
1250 })?;
1251 let content_href = title_element.get_attr("href").map(PathBuf::from);
1252 let sub_list = if let Some(list) = item.find_children_by_name("ol").next() {
1253 self.parse_catalog_list(list)?
1254 } else {
1255 vec![]
1256 };
1257
1258 catalog.push(NavPoint {
1259 label: title_element.text(),
1260 content: content_href,
1261 children: sub_list,
1262 play_order: None,
1263 });
1264 }
1265
1266 Ok(catalog)
1267 }
1268
1269 #[inline]
1286 fn normalize_manifest_path(&self, path: &str) -> Result<PathBuf, EpubError> {
1287 let mut path = if path.starts_with("../") {
1288 let mut current_dir = self.epub_path.join(&self.package_path);
1289 current_dir.pop();
1290
1291 check_realtive_link_leakage(self.epub_path.clone(), current_dir, path)
1292 .map(PathBuf::from)
1293 .ok_or_else(|| EpubError::RealtiveLinkLeakage {
1294 path: path.to_string(),
1295 })?
1296 } else if let Some(path) = path.strip_prefix("/") {
1297 PathBuf::from(path.to_string())
1298 } else {
1299 self.base_path.join(path)
1300 };
1301
1302 #[cfg(windows)]
1303 {
1304 path = PathBuf::from(path.to_string_lossy().replace('\\', "/"));
1305 }
1306
1307 Ok(path)
1308 }
1309
1310 fn validate_fallback_chains(&self) {
1321 for (id, item) in &self.manifest {
1322 if item.fallback.is_none() {
1323 continue;
1324 }
1325
1326 let mut fallback_chain = Vec::new();
1327 if let Err(msg) = self.validate_fallback_chain(id, &mut fallback_chain) {
1328 warn!("Invalid fallback chain for item {}: {}", id, msg);
1329 }
1330 }
1331 }
1332
1333 fn validate_fallback_chain(
1347 &self,
1348 manifest_id: &str,
1349 fallback_chain: &mut Vec<String>,
1350 ) -> Result<(), String> {
1351 if fallback_chain.contains(&manifest_id.to_string()) {
1352 fallback_chain.push(manifest_id.to_string());
1353
1354 return Err(format!(
1355 "Circular reference detected in fallback chain for {}",
1356 fallback_chain.join("->")
1357 ));
1358 }
1359
1360 let item = self.manifest.get(manifest_id).unwrap();
1362
1363 if let Some(fallback_id) = &item.fallback {
1364 if !self.manifest.contains_key(fallback_id) {
1365 return Err(format!(
1366 "Fallback resource {} does not exist in manifest",
1367 fallback_id
1368 ));
1369 }
1370
1371 fallback_chain.push(manifest_id.to_string());
1372 self.validate_fallback_chain(fallback_id, fallback_chain)
1373 } else {
1374 Ok(())
1376 }
1377 }
1378
1379 fn is_encryption_file(&self, path: &str) -> Option<String> {
1392 self.encryption.as_ref().and_then(|encryptions| {
1393 encryptions
1394 .iter()
1395 .find(|encryption| encryption.data == path)
1396 .map(|encryption| encryption.method.clone())
1397 })
1398 }
1399
1400 #[inline]
1418 fn auto_dencrypt(&self, method: &str, data: &mut [u8]) -> Result<Vec<u8>, EpubError> {
1419 match method {
1420 "http://www.idpf.org/2008/embedding" => {
1421 Ok(idpf_font_dencryption(data, &self.unique_identifier))
1422 }
1423 "http://ns.adobe.com/pdf/enc#RC" => {
1424 Ok(adobe_font_dencryption(data, &self.unique_identifier))
1425 }
1426 _ => Err(EpubError::UnsupportedEncryptedMethod {
1427 method: method.to_string(),
1428 }),
1429 }
1430 }
1431}
1432
1433impl EpubDoc<BufReader<File>> {
1434 pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, EpubError> {
1446 let file = File::open(&path).map_err(EpubError::from)?;
1447 let path = canonicalize(path)?;
1448
1449 Self::from_reader(BufReader::new(file), path)
1450 }
1451}
1452
1453#[cfg(test)]
1454mod tests {
1455 use std::{
1456 fs::File,
1457 io::BufReader,
1458 path::{Path, PathBuf},
1459 };
1460
1461 use crate::{epub::EpubDoc, error::EpubError, utils::XmlReader};
1462
1463 mod package_documents_tests {
1465 use std::path::Path;
1466
1467 use crate::epub::{EpubDoc, EpubVersion};
1468
1469 #[test]
1473 fn test_pkg_collections_unknown() {
1474 let epub_file = Path::new("./test_case/pkg-collections-unknown.epub");
1475 let doc = EpubDoc::new(epub_file);
1476 assert!(doc.is_ok());
1477 }
1478
1479 #[test]
1483 fn test_pkg_creator_order() {
1484 let epub_file = Path::new("./test_case/pkg-creator-order.epub");
1485 let doc = EpubDoc::new(epub_file);
1486 assert!(doc.is_ok());
1487
1488 let doc = doc.unwrap();
1489 let creators = doc.get_metadata_value("creator");
1490 assert!(creators.is_some());
1491
1492 let creators = creators.unwrap();
1493 assert_eq!(creators.len(), 5);
1494 assert_eq!(
1495 creators,
1496 vec![
1497 "Dave Cramer",
1498 "Wendy Reid",
1499 "Dan Lazin",
1500 "Ivan Herman",
1501 "Brady Duga",
1502 ]
1503 );
1504 }
1505
1506 #[test]
1510 fn test_pkg_manifest_order() {
1511 let epub_file = Path::new("./test_case/pkg-manifest-unknown.epub");
1512 let doc = EpubDoc::new(epub_file);
1513 assert!(doc.is_ok());
1514
1515 let mut doc = doc.unwrap();
1516 assert_eq!(doc.manifest.len(), 2);
1517 assert!(doc.get_manifest_item("nav").is_ok());
1518 assert!(doc.get_manifest_item("content_001").is_ok());
1519 assert!(doc.get_manifest_item("content_002").is_err());
1520 }
1521
1522 #[test]
1526 fn test_pkg_meta_unknown() {
1527 let epub_file = Path::new("./test_case/pkg-meta-unknown.epub");
1528 let doc = EpubDoc::new(epub_file);
1529 assert!(doc.is_ok());
1530
1531 let doc = doc.unwrap();
1532 let value = doc.get_metadata_value("dcterms:isReferencedBy");
1533 assert!(value.is_some());
1534 let value = value.unwrap();
1535 assert_eq!(value.len(), 1);
1536 assert_eq!(
1537 value,
1538 vec!["https://www.w3.org/TR/epub-rs/#confreq-rs-pkg-meta-unknown"]
1539 );
1540
1541 let value = doc.get_metadata_value("dcterms:modified");
1542 assert!(value.is_some());
1543 let value = value.unwrap();
1544 assert_eq!(value.len(), 1);
1545 assert_eq!(value, vec!["2021-01-11T00:00:00Z"]);
1546
1547 let value = doc.get_metadata_value("dcterms:title");
1548 assert!(value.is_none());
1549 }
1550
1551 #[test]
1555 fn test_pkg_meta_white_space() {
1556 let epub_file = Path::new("./test_case/pkg-meta-whitespace.epub");
1557 let doc = EpubDoc::new(epub_file);
1558 assert!(doc.is_ok());
1559
1560 let doc = doc.unwrap();
1561 let value = doc.get_metadata_value("creator");
1562 assert!(value.is_some());
1563 let value = value.unwrap();
1564 assert_eq!(value.len(), 1);
1565 assert_eq!(value, vec!["Dave Cramer"]);
1566
1567 let value = doc.get_metadata_value("description");
1568 assert!(value.is_some());
1569 let value = value.unwrap();
1570 assert_eq!(value.len(), 1);
1571 assert_eq!(
1572 value,
1573 vec![
1574 "The package document's title and creator contain leading and trailing spaces along with excess internal whitespace. The reading system must render only a single space in all cases."
1575 ]
1576 );
1577 }
1578
1579 #[test]
1583 fn test_pkg_spine_duplicate_item_hyperlink() {
1584 let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-hyperlink.epub");
1585 let doc = EpubDoc::new(epub_file);
1586 assert!(doc.is_ok());
1587
1588 let mut doc = doc.unwrap();
1589 assert_eq!(doc.spine.len(), 4);
1590 assert_eq!(
1591 doc.navigate_by_spine_index(0).unwrap(),
1592 doc.get_manifest_item("content_001").unwrap()
1593 );
1594 assert_eq!(
1595 doc.navigate_by_spine_index(1).unwrap(),
1596 doc.get_manifest_item("content_002").unwrap()
1597 );
1598 assert_eq!(
1599 doc.navigate_by_spine_index(2).unwrap(),
1600 doc.get_manifest_item("content_002").unwrap()
1601 );
1602 assert_eq!(
1603 doc.navigate_by_spine_index(3).unwrap(),
1604 doc.get_manifest_item("content_002").unwrap()
1605 );
1606 }
1607
1608 #[test]
1612 fn test_pkg_spine_duplicate_item_rendering() {
1613 let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-rendering.epub");
1614 let doc = EpubDoc::new(epub_file);
1615 assert!(doc.is_ok());
1616
1617 let mut doc = doc.unwrap();
1618 assert_eq!(doc.spine.len(), 4);
1619
1620 let result = doc.spine_prev();
1621 assert!(result.is_none());
1622
1623 let result = doc.spine_next();
1624 assert!(result.is_some());
1625
1626 doc.spine_next();
1627 doc.spine_next();
1628 let result = doc.spine_next();
1629 assert!(result.is_none());
1630 }
1631
1632 #[test]
1636 fn test_pkg_spine_nonlinear_activation() {
1637 let epub_file = Path::new("./test_case/pkg-spine-nonlinear-activation.epub");
1638 let doc = EpubDoc::new(epub_file);
1639 assert!(doc.is_ok());
1640
1641 let mut doc = doc.unwrap();
1642 assert!(doc.spine_prev().is_none());
1643 assert!(doc.spine_next().is_none());
1644
1645 assert!(doc.navigate_by_spine_index(1).is_some());
1646 assert!(doc.spine_prev().is_none());
1647 assert!(doc.spine_next().is_none());
1648 }
1649
1650 #[test]
1654 fn test_pkg_spine_order() {
1655 let epub_file = Path::new("./test_case/pkg-spine-order.epub");
1656 let doc = EpubDoc::new(epub_file);
1657 assert!(doc.is_ok());
1658
1659 let doc = doc.unwrap();
1660 assert_eq!(doc.spine.len(), 4);
1661 assert_eq!(
1662 doc.spine
1663 .iter()
1664 .map(|item| item.idref.clone())
1665 .collect::<Vec<String>>(),
1666 vec![
1667 "d-content_001",
1668 "c-content_002",
1669 "b-content_003",
1670 "a-content_004",
1671 ]
1672 );
1673 }
1674
1675 #[test]
1679 fn test_spine_order_svg() {
1680 let epub_file = Path::new("./test_case/pkg-spine-order-svg.epub");
1681 let doc = EpubDoc::new(epub_file);
1682 assert!(doc.is_ok());
1683
1684 let mut doc = doc.unwrap();
1685 assert_eq!(doc.spine.len(), 4);
1686
1687 loop {
1688 if let Some(spine) = doc.spine_next() {
1689 let idref = doc.spine[doc.current_spine_index].idref.clone();
1690 let resource = doc.get_manifest_item(&idref);
1691 assert!(resource.is_ok());
1692
1693 let resource = resource.unwrap();
1694 assert_eq!(spine, resource);
1695 } else {
1696 break;
1697 }
1698 }
1699
1700 assert_eq!(doc.current_spine_index, 3);
1701 }
1702
1703 #[test]
1707 fn test_pkg_spine_unknown() {
1708 let epub_file = Path::new("./test_case/pkg-spine-unknown.epub");
1709 let doc = EpubDoc::new(epub_file);
1710 assert!(doc.is_ok());
1711
1712 let doc = doc.unwrap();
1713 assert_eq!(doc.spine.len(), 1);
1714 assert_eq!(doc.spine[0].idref, "content_001");
1715 assert_eq!(doc.spine[0].id, None);
1716 assert_eq!(doc.spine[0].linear, true);
1717 assert_eq!(doc.spine[0].properties, Some("untrustworthy".to_string()));
1718 }
1719
1720 #[test]
1724 fn test_pkg_title_order() {
1725 let epub_file = Path::new("./test_case/pkg-title-order.epub");
1726 let doc = EpubDoc::new(epub_file);
1727 assert!(doc.is_ok());
1728
1729 let doc = doc.unwrap();
1730 let title_list = doc.get_title();
1731 assert!(title_list.is_ok());
1732
1733 let title_list = title_list.unwrap();
1734 assert_eq!(title_list.len(), 6);
1735 assert_eq!(
1736 title_list,
1737 vec![
1738 "pkg-title-order",
1739 "This title must not display first",
1740 "Also, this title must not display first",
1741 "This title also must not display first",
1742 "This title must also not display first",
1743 "This title must not display first, also",
1744 ]
1745 );
1746 }
1747
1748 #[test]
1752 fn test_pkg_unique_id() {
1753 let epub_file = Path::new("./test_case/pkg-unique-id.epub");
1754 let doc_1 = EpubDoc::new(epub_file);
1755 assert!(doc_1.is_ok());
1756
1757 let epub_file = Path::new("./test_case/pkg-unique-id_duplicate.epub");
1758 let doc_2 = EpubDoc::new(epub_file);
1759 assert!(doc_2.is_ok());
1760
1761 let doc_1 = doc_1.unwrap();
1762 let doc_2 = doc_2.unwrap();
1763
1764 assert_eq!(
1765 doc_1.get_identifier().unwrap(),
1766 doc_2.get_identifier().unwrap()
1767 );
1768 assert_eq!(doc_1.unique_identifier, "pkg-unique-id");
1769 assert_eq!(doc_2.unique_identifier, "pkg-unique-id");
1770 }
1771
1772 #[test]
1776 fn test_pkg_version_backward() {
1777 let epub_file = Path::new("./test_case/pkg-version-backward.epub");
1778 let doc = EpubDoc::new(epub_file);
1779 assert!(doc.is_ok());
1780
1781 let doc = doc.unwrap();
1782 assert_eq!(doc.version, EpubVersion::Version3_0);
1783 }
1784
1785 #[test]
1789 fn test_pkg_linked_records() {
1790 let epub_file = Path::new("./test_case/pkg-linked-records.epub");
1791 let doc = EpubDoc::new(epub_file);
1792 assert!(doc.is_ok());
1793
1794 let doc = doc.unwrap();
1795 assert_eq!(doc.metadata_link.len(), 3);
1796
1797 let item = doc.metadata_link.iter().find(|&item| {
1798 if let Some(properties) = &item.properties {
1799 properties.eq("onix")
1800 } else {
1801 false
1802 }
1803 });
1804 assert!(item.is_some());
1805 }
1806
1807 #[test]
1811 fn test_pkg_manifest_unlisted_resource() {
1812 let epub_file = Path::new("./test_case/pkg-manifest-unlisted-resource.epub");
1813 let doc = EpubDoc::new(epub_file);
1814 assert!(doc.is_ok());
1815
1816 let mut doc = doc.unwrap();
1817 assert!(
1818 doc.get_manifest_item_by_path("EPUB/content_001.xhtml")
1819 .is_ok()
1820 );
1821
1822 assert!(doc.get_manifest_item_by_path("EPUB/red.png").is_err());
1823 let err = doc.get_manifest_item_by_path("EPUB/red.png").unwrap_err();
1824 assert_eq!(
1825 err.to_string(),
1826 "Resource not found: Unable to find resource from \"EPUB/red.png\"."
1827 );
1828 }
1829 }
1830
1831 mod manifest_fallbacks_tests {
1835 use std::path::Path;
1836
1837 use crate::epub::EpubDoc;
1838
1839 #[test]
1843 fn test_pub_foreign_bad_fallback() {
1844 let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
1845 let doc = EpubDoc::new(epub_file);
1846 assert!(doc.is_ok());
1847
1848 let mut doc = doc.unwrap();
1849 assert!(doc.get_manifest_item("content_001").is_ok());
1850 assert!(doc.get_manifest_item("bar").is_ok());
1851
1852 assert_eq!(
1853 doc.get_manifest_item_with_fallback("content_001", vec!["application/xhtml+xml"])
1854 .unwrap_err()
1855 .to_string(),
1856 "No supported file format: The fallback resource does not contain the file format you support."
1857 );
1858 }
1859
1860 #[test]
1864 fn test_pub_foreign_image() {
1865 let epub_file = Path::new("./test_case/pub-foreign_image.epub");
1866 let doc = EpubDoc::new(epub_file);
1867 assert!(doc.is_ok());
1868
1869 let mut doc = doc.unwrap();
1870 let result = doc.get_manifest_item_with_fallback(
1871 "image-tiff",
1872 vec!["image/png", "application/xhtml+xml"],
1873 );
1874 assert!(result.is_ok());
1875
1876 let (_, mime) = result.unwrap();
1877 assert_eq!(mime, "image/png");
1878 }
1879
1880 #[test]
1884 fn test_pub_foreign_json_spine() {
1885 let epub_file = Path::new("./test_case/pub-foreign_json-spine.epub");
1886 let doc = EpubDoc::new(epub_file);
1887 assert!(doc.is_ok());
1888
1889 let mut doc = doc.unwrap();
1890 let result = doc.get_manifest_item_with_fallback(
1891 "content_primary",
1892 vec!["application/xhtml+xml", "application/json"],
1893 );
1894 assert!(result.is_ok());
1895 let (_, mime) = result.unwrap();
1896 assert_eq!(mime, "application/json");
1897
1898 let result = doc
1899 .get_manifest_item_with_fallback("content_primary", vec!["application/xhtml+xml"]);
1900 assert!(result.is_ok());
1901 let (_, mime) = result.unwrap();
1902 assert_eq!(mime, "application/xhtml+xml");
1903 }
1904
1905 #[test]
1909 fn test_pub_foreign_xml_spine() {
1910 let epub_file = Path::new("./test_case/pub-foreign_xml-spine.epub");
1911 let doc = EpubDoc::new(epub_file);
1912 assert!(doc.is_ok());
1913
1914 let mut doc = doc.unwrap();
1915 let result = doc.get_manifest_item_with_fallback(
1916 "content_primary",
1917 vec!["application/xhtml+xml", "application/xml"],
1918 );
1919 assert!(result.is_ok());
1920 let (_, mime) = result.unwrap();
1921 assert_eq!(mime, "application/xml");
1922
1923 let result = doc
1924 .get_manifest_item_with_fallback("content_primary", vec!["application/xhtml+xml"]);
1925 assert!(result.is_ok());
1926 let (_, mime) = result.unwrap();
1927 assert_eq!(mime, "application/xhtml+xml");
1928 }
1929
1930 #[test]
1934 fn test_pub_foreign_xml_suffix_spine() {
1935 let epub_file = Path::new("./test_case/pub-foreign_xml-suffix-spine.epub");
1936 let doc = EpubDoc::new(epub_file);
1937 assert!(doc.is_ok());
1938
1939 let mut doc = doc.unwrap();
1940 let result = doc.get_manifest_item_with_fallback(
1941 "content_primary",
1942 vec!["application/xhtml+xml", "application/dtc+xml"],
1943 );
1944 assert!(result.is_ok());
1945 let (_, mime) = result.unwrap();
1946 assert_eq!(mime, "application/dtc+xml");
1947
1948 let result = doc
1949 .get_manifest_item_with_fallback("content_primary", vec!["application/xhtml+xml"]);
1950 assert!(result.is_ok());
1951 let (_, mime) = result.unwrap();
1952 assert_eq!(mime, "application/xhtml+xml");
1953 }
1954 }
1955
1956 mod open_container_format_tests {
1958 use std::{cmp::min, io::Read, path::Path};
1959
1960 use sha1::{Digest, Sha1};
1961
1962 use crate::epub::EpubDoc;
1963
1964 #[test]
1968 fn test_ocf_metainf_inc() {
1969 let epub_file = Path::new("./test_case/ocf-metainf-inc.epub");
1970 let doc = EpubDoc::new(epub_file);
1971 assert!(doc.is_ok());
1972 }
1973
1974 #[test]
1978 fn test_ocf_metainf_manifest() {
1979 let epub_file = Path::new("./test_case/ocf-metainf-manifest.epub");
1980 let doc = EpubDoc::new(epub_file);
1981 assert!(doc.is_ok());
1982 }
1983
1984 #[test]
1988 fn test_ocf_package_arbitrary() {
1989 let epub_file = Path::new("./test_case/ocf-package_arbitrary.epub");
1990 let doc = EpubDoc::new(epub_file);
1991 assert!(doc.is_ok());
1992
1993 let doc = doc.unwrap();
1994 assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
1995 }
1996
1997 #[test]
2001 fn test_ocf_package_multiple() {
2002 let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2003 let doc = EpubDoc::new(epub_file);
2004 assert!(doc.is_ok());
2005
2006 let doc = doc.unwrap();
2007 assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2008 assert_eq!(doc.base_path, Path::new("FOO/BAR"));
2009 }
2010
2011 #[test]
2015 fn test_ocf_url_link_leaking_relative() {
2016 let epub_file = Path::new("./test_case/ocf-url_link-leaking-relative.epub");
2017 let doc = EpubDoc::new(epub_file);
2018 assert!(doc.is_err());
2019 assert_eq!(
2020 doc.err().unwrap().to_string(),
2021 String::from(
2022 "Relative link leakage: Path \"../../../../media/imgs/monastery.jpg\" is out of container range."
2023 )
2024 )
2025 }
2026
2027 #[test]
2031 fn test_ocf_url_link_path_absolute() {
2032 let epub_file = Path::new("./test_case/ocf-url_link-path-absolute.epub");
2033 let doc = EpubDoc::new(epub_file);
2034 assert!(doc.is_ok());
2035
2036 let doc = doc.unwrap();
2037 let resource = doc.manifest.get("photo").unwrap();
2038 assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2039 }
2040
2041 #[test]
2045 fn test_ocf_url_link_relative() {
2046 let epub_file = Path::new("./test_case/ocf-url_link-relative.epub");
2047 let doc = EpubDoc::new(epub_file);
2048 assert!(doc.is_ok());
2049
2050 let doc = doc.unwrap();
2051 let resource = doc.manifest.get("photo").unwrap();
2052 assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2053 }
2054
2055 #[test]
2059 fn test_ocf_url_manifest() {
2060 let epub_file = Path::new("./test_case/ocf-url_manifest.epub");
2061 let doc = EpubDoc::new(epub_file);
2062 assert!(doc.is_ok());
2063
2064 let mut doc = doc.unwrap();
2065 assert!(doc.get_manifest_item("nav").is_ok());
2066 assert!(doc.get_manifest_item("content_001").is_ok());
2067 assert!(doc.get_manifest_item("content_002").is_err());
2068 }
2069
2070 #[test]
2074 fn test_ocf_url_relative() {
2075 let epub_file = Path::new("./test_case/ocf-url_relative.epub");
2076 let doc = EpubDoc::new(epub_file);
2077 assert!(doc.is_ok());
2078
2079 let mut doc = doc.unwrap();
2080 assert_eq!(doc.package_path, Path::new("foo/BAR/baz.opf"));
2081 assert_eq!(doc.base_path, Path::new("foo/BAR"));
2082 assert_eq!(
2083 doc.manifest.get("nav").unwrap().path,
2084 Path::new("foo/BAR/nav.xhtml")
2085 );
2086 assert_eq!(
2087 doc.manifest.get("content_001").unwrap().path,
2088 Path::new("foo/BAR/qux/content_001.xhtml")
2089 );
2090 assert!(doc.get_manifest_item("nav").is_ok());
2091 assert!(doc.get_manifest_item("content_001").is_ok());
2092 }
2093
2094 #[test]
2099 fn test_ocf_zip_comp() {
2100 let epub_file = Path::new("./test_case/ocf-zip-comp.epub");
2101 let doc = EpubDoc::new(epub_file);
2102 assert!(doc.is_ok());
2103 }
2104
2105 #[test]
2110 fn test_ocf_zip_mult() {
2111 let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2112 let doc = EpubDoc::new(epub_file);
2113 assert!(doc.is_ok());
2114 }
2115
2116 #[test]
2120 fn test_ocf_font_obfuscation() {
2121 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2122 let doc = EpubDoc::new(epub_file);
2123 assert!(doc.is_ok());
2124
2125 let mut doc = doc.unwrap();
2126 let unique_id = doc.unique_identifier.clone();
2127
2128 let mut hasher = Sha1::new();
2129 hasher.update(unique_id.as_bytes());
2130 let hash = hasher.finalize();
2131 let mut key = vec![0u8; 1040];
2132 for i in 0..1040 {
2133 key[i] = hash[i % hash.len()];
2134 }
2135
2136 assert!(doc.encryption.is_some());
2137 assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2138
2139 let data = &doc.encryption.unwrap()[0];
2140 assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2141
2142 let font_file = doc
2143 .archive
2144 .by_name(&data.data)
2145 .unwrap()
2146 .bytes()
2147 .collect::<Result<Vec<u8>, _>>();
2148 assert!(font_file.is_ok());
2149 let font_file = font_file.unwrap();
2150
2151 let mut deobfuscated = font_file.clone();
2153 for i in 0..min(1040, deobfuscated.len()) {
2154 deobfuscated[i] ^= key[i];
2155 }
2156
2157 assert!(is_valid_font(&deobfuscated));
2158 }
2159
2160 #[test]
2164 fn test_ocf_font_obfuscation_bis() {
2165 let epub_file = Path::new("./test_case/ocf-font_obfuscation_bis.epub");
2166 let doc = EpubDoc::new(epub_file);
2167 assert!(doc.is_ok());
2168
2169 let mut doc = doc.unwrap();
2170
2171 let wrong_unique_id = "wrong-publication-id";
2172 let mut hasher = Sha1::new();
2173 hasher.update(wrong_unique_id.as_bytes());
2174 let hash = hasher.finalize();
2175 let mut wrong_key = vec![0u8; 1040];
2176 for i in 0..1040 {
2177 wrong_key[i] = hash[i % hash.len()];
2178 }
2179
2180 assert!(doc.encryption.is_some());
2181 assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2182
2183 let data = &doc.encryption.unwrap()[0];
2184 assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2185
2186 let font_file = doc
2187 .archive
2188 .by_name(&data.data)
2189 .unwrap()
2190 .bytes()
2191 .collect::<Result<Vec<u8>, _>>();
2192 assert!(font_file.is_ok());
2193 let font_file = font_file.unwrap();
2194
2195 let mut deobfuscated_with_wrong_key = font_file.clone();
2197 for i in 0..std::cmp::min(1040, deobfuscated_with_wrong_key.len()) {
2198 deobfuscated_with_wrong_key[i] ^= wrong_key[i];
2199 }
2200
2201 assert!(!is_valid_font(&deobfuscated_with_wrong_key));
2202 }
2203
2204 fn is_valid_font(data: &[u8]) -> bool {
2205 if data.len() < 4 {
2206 return false;
2207 }
2208 let sig = &data[0..4];
2209 sig == b"OTTO"
2212 || sig == b"\x00\x01\x00\x00"
2213 || sig == b"\x00\x02\x00\x00"
2214 || sig == b"true"
2215 || sig == b"typ1"
2216 }
2217 }
2218
2219 #[test]
2220 fn test_parse_container() {
2221 let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2222 let doc = EpubDoc::new(epub_file);
2223 assert!(doc.is_ok());
2224
2225 let container = r#"
2227 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2228 <rootfiles></rootfiles>
2229 </container>
2230 "#
2231 .to_string();
2232
2233 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2234 assert!(result.is_err());
2235 assert_eq!(
2236 result.unwrap_err(),
2237 EpubError::NonCanonicalFile {
2238 tag: "rootfile".to_string()
2239 }
2240 );
2241
2242 let container = r#"
2243 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2244 <rootfiles>
2245 <rootfile media-type="application/oebps-package+xml"/>
2246 </rootfiles>
2247 </container>
2248 "#
2249 .to_string();
2250
2251 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2252 assert!(result.is_err());
2253 assert_eq!(
2254 result.unwrap_err(),
2255 EpubError::MissingRequiredAttribute {
2256 tag: "rootfile".to_string(),
2257 attribute: "full-path".to_string(),
2258 }
2259 );
2260
2261 let container = r#"
2262 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2263 <rootfiles>
2264 <rootfile media-type="application/oebps-package+xml" full-path="EPUB/content.opf"/>
2265 </rootfiles>
2266 </container>
2267 "#
2268 .to_string();
2269
2270 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2271 assert!(result.is_ok());
2272 assert_eq!(result.unwrap(), PathBuf::from("EPUB/content.opf"))
2273 }
2274
2275 #[test]
2276 fn test_parse_manifest() {
2277 let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2278 let doc = EpubDoc::new(epub_file);
2279 assert!(doc.is_ok());
2280
2281 let manifest = r#"
2282 <manifest>
2283 <item href="content_001.xhtml" media-type="application/xhtml+xml"/>
2284 <item properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2285 </manifest>
2286 "#;
2287 let mut doc = doc.unwrap();
2288 let element = XmlReader::parse(manifest);
2289 assert!(element.is_ok());
2290
2291 let element = element.unwrap();
2292 let result = doc.parse_manifest(&element);
2293 assert!(result.is_err());
2294 assert_eq!(
2295 result.unwrap_err(),
2296 EpubError::MissingRequiredAttribute {
2297 tag: "item".to_string(),
2298 attribute: "id".to_string(),
2299 },
2300 );
2301
2302 let manifest = r#"
2303 <manifest>
2304 <item id="content_001" media-type="application/xhtml+xml"/>
2305 <item id="nav" properties="nav" media-type="application/xhtml+xml"/>
2306 </manifest>
2307 "#;
2308 let element = XmlReader::parse(manifest);
2309 assert!(element.is_ok());
2310
2311 let element = element.unwrap();
2312 let result = doc.parse_manifest(&element);
2313 assert!(result.is_err());
2314 assert_eq!(
2315 result.unwrap_err(),
2316 EpubError::MissingRequiredAttribute {
2317 tag: "item".to_string(),
2318 attribute: "href".to_string(),
2319 },
2320 );
2321
2322 let manifest = r#"
2323 <manifest>
2324 <item id="content_001" href="content_001.xhtml"/>
2325 <item id="nav" properties="nav" href="nav.xhtml"/>
2326 </manifest>
2327 "#;
2328 let element = XmlReader::parse(manifest);
2329 assert!(element.is_ok());
2330
2331 let element = element.unwrap();
2332 let result = doc.parse_manifest(&element);
2333 assert!(result.is_err());
2334 assert_eq!(
2335 result.unwrap_err(),
2336 EpubError::MissingRequiredAttribute {
2337 tag: "item".to_string(),
2338 attribute: "media-type".to_string(),
2339 },
2340 );
2341
2342 let manifest = r#"
2343 <manifest>
2344 <item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
2345 <item id="nav" properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2346 </manifest>
2347 "#;
2348 let element = XmlReader::parse(manifest);
2349 assert!(element.is_ok());
2350
2351 let element = element.unwrap();
2352 let result = doc.parse_manifest(&element);
2353 assert!(result.is_ok());
2354 }
2355
2356 #[test]
2358 fn test_fn_has_encryption() {
2359 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2360 let doc = EpubDoc::new(epub_file);
2361 assert!(doc.is_ok());
2362
2363 let mut doc = doc.unwrap();
2364 assert!(doc.has_encryption());
2365 }
2366
2367 #[test]
2369 fn test_fn_parse_encryption() {
2370 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2371 let doc = EpubDoc::new(epub_file);
2372 assert!(doc.is_ok());
2373
2374 let doc = doc.unwrap();
2375 assert!(doc.encryption.is_some());
2376
2377 let encryption = doc.encryption.unwrap();
2378 assert_eq!(encryption.len(), 1);
2379 assert_eq!(encryption[0].method, "http://www.idpf.org/2008/embedding");
2380 assert_eq!(encryption[0].data, "EPUB/fonts/Lobster.ttf");
2381 }
2382
2383 #[test]
2384 fn test_get_metadata_existing_key() {
2385 let epub_file = Path::new("./test_case/epub-33.epub");
2386 let doc = EpubDoc::new(epub_file);
2387 assert!(doc.is_ok());
2388
2389 let doc = doc.unwrap();
2390
2391 let titles = doc.get_metadata("title");
2392 assert!(titles.is_some());
2393
2394 let titles = titles.unwrap();
2395 assert_eq!(titles.len(), 1);
2396 assert_eq!(titles[0].property, "title");
2397 assert_eq!(titles[0].value, "EPUB 3.3");
2398
2399 let languages = doc.get_metadata("language");
2400 assert!(languages.is_some());
2401
2402 let languages = languages.unwrap();
2403 assert_eq!(languages.len(), 1);
2404 assert_eq!(languages[0].property, "language");
2405 assert_eq!(languages[0].value, "en-us");
2406
2407 let language = doc.get_language();
2408 assert!(language.is_ok());
2409 assert_eq!(language.unwrap(), vec!["en-us"]);
2410 }
2411
2412 #[test]
2413 fn test_get_metadata_nonexistent_key() {
2414 let epub_file = Path::new("./test_case/epub-33.epub");
2415 let doc = EpubDoc::new(epub_file);
2416 assert!(doc.is_ok());
2417
2418 let doc = doc.unwrap();
2419 let metadata = doc.get_metadata("nonexistent");
2420 assert!(metadata.is_none());
2421 }
2422
2423 #[test]
2424 fn test_get_metadata_multiple_items_same_type() {
2425 let epub_file = Path::new("./test_case/epub-33.epub");
2426 let doc = EpubDoc::new(epub_file);
2427 assert!(doc.is_ok());
2428
2429 let doc = doc.unwrap();
2430
2431 let creators = doc.get_metadata("creator");
2432 assert!(creators.is_some());
2433
2434 let creators = creators.unwrap();
2435 assert_eq!(creators.len(), 3);
2436
2437 assert_eq!(creators[0].id, Some("creator_id_0".to_string()));
2438 assert_eq!(creators[0].property, "creator");
2439 assert_eq!(creators[0].value, "Matt Garrish, DAISY Consortium");
2440
2441 assert_eq!(creators[1].id, Some("creator_id_1".to_string()));
2442 assert_eq!(creators[1].property, "creator");
2443 assert_eq!(creators[1].value, "Ivan Herman, W3C");
2444
2445 assert_eq!(creators[2].id, Some("creator_id_2".to_string()));
2446 assert_eq!(creators[2].property, "creator");
2447 assert_eq!(creators[2].value, "Dave Cramer, Invited Expert");
2448 }
2449
2450 #[test]
2451 fn test_get_metadata_with_refinement() {
2452 let epub_file = Path::new("./test_case/epub-33.epub");
2453 let doc = EpubDoc::new(epub_file);
2454 assert!(doc.is_ok());
2455
2456 let doc = doc.unwrap();
2457
2458 let title = doc.get_metadata("title");
2459 assert!(title.is_some());
2460
2461 let title = title.unwrap();
2462 assert_eq!(title.len(), 1);
2463 assert_eq!(title[0].refined.len(), 1);
2464 assert_eq!(title[0].refined[0].property, "title-type");
2465 assert_eq!(title[0].refined[0].value, "main");
2466 }
2467
2468 #[test]
2469 fn test_get_manifest_item_with_fallback() {
2470 let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
2471 let doc = EpubDoc::new(epub_file);
2472 assert!(doc.is_ok());
2473
2474 let mut doc = doc.unwrap();
2475 assert!(doc.get_manifest_item("content_001").is_ok());
2476 assert!(doc.get_manifest_item("bar").is_ok());
2477
2478 if let Ok((_, mime)) = doc.get_manifest_item_with_fallback("content_001", vec!["image/psd"])
2480 {
2481 assert_eq!(mime, "image/psd");
2482 } else {
2483 assert!(false, "get_manifest_item_with_fallback failed");
2484 }
2485
2486 assert_eq!(
2488 doc.get_manifest_item_with_fallback("content_001", vec!["application/xhtml+xml"])
2489 .unwrap_err()
2490 .to_string(),
2491 "No supported file format: The fallback resource does not contain the file format you support."
2492 );
2493 }
2494
2495 #[test]
2496 fn test_get_cover() {
2497 let epub_file = Path::new("./test_case/pkg-cover-image.epub");
2498 let doc = EpubDoc::new(epub_file);
2499 if let Err(err) = &doc {
2500 println!("{}", err);
2501 }
2502 assert!(doc.is_ok());
2503
2504 let mut doc = doc.unwrap();
2505 let result = doc.get_cover();
2506 assert!(result.is_some());
2507
2508 let (data, mime) = result.unwrap();
2509 assert_eq!(data.len(), 5785);
2510 assert_eq!(mime, "image/jpeg");
2511 }
2512
2513 #[test]
2514 fn test_epub_2() {
2515 let epub_file = Path::new("./test_case/epub-2.epub");
2516 let doc = EpubDoc::new(epub_file);
2517 assert!(doc.is_ok());
2518
2519 let doc = doc.unwrap();
2520
2521 let titles = doc.get_title();
2522 assert!(titles.is_ok());
2523 assert_eq!(titles.unwrap(), vec!["Minimal EPUB 2.0"]);
2524 }
2525}