1use std::{
24 collections::HashMap,
25 fs::{File, canonicalize},
26 io::{BufReader, Read, Seek},
27 path::{Path, PathBuf},
28 sync::{
29 Arc, Mutex,
30 atomic::{AtomicUsize, Ordering},
31 },
32};
33
34use log::warn;
35use zip::{ZipArchive, result::ZipError};
36
37use crate::{
38 error::EpubError,
39 types::{
40 EncryptionData, EpubVersion, ManifestItem, MetadataItem, MetadataLinkItem,
41 MetadataRefinement, NavPoint, SpineItem,
42 },
43 utils::{
44 DecodeBytes, NormalizeWhitespace, XmlElement, XmlReader, adobe_font_dencryption,
45 check_realtive_link_leakage, compression_method_check, get_file_in_zip_archive,
46 idpf_font_dencryption,
47 },
48};
49
50pub struct EpubDoc<R: Read + Seek> {
76 pub(crate) archive: Arc<Mutex<ZipArchive<R>>>,
78
79 pub(crate) epub_path: PathBuf,
81
82 pub package_path: PathBuf,
84
85 pub base_path: PathBuf,
87
88 pub version: EpubVersion,
90
91 pub unique_identifier: String,
95
96 pub metadata: Vec<MetadataItem>,
98
99 pub metadata_link: Vec<MetadataLinkItem>,
101
102 pub manifest: HashMap<String, ManifestItem>,
107
108 pub spine: Vec<SpineItem>,
113
114 pub encryption: Option<Vec<EncryptionData>>,
116
117 pub catalog: Vec<NavPoint>,
119
120 pub catalog_title: String,
122
123 current_spine_index: AtomicUsize,
125
126 has_encryption: bool,
128}
129
130impl<R: Read + Seek> EpubDoc<R> {
131 pub fn from_reader(reader: R, epub_path: PathBuf) -> Result<Self, EpubError> {
150 let mut archive = ZipArchive::new(reader).map_err(EpubError::from)?;
160 let epub_path = canonicalize(epub_path)?;
161
162 compression_method_check(&mut archive)?;
163
164 let container =
165 get_file_in_zip_archive(&mut archive, "META-INF/container.xml")?.decode()?;
166 let package_path = Self::parse_container(container)?;
167 let base_path = package_path
168 .parent()
169 .expect("所有文件的父目录不能为空")
170 .to_path_buf();
171
172 let opf_file =
173 get_file_in_zip_archive(&mut archive, package_path.to_str().unwrap())?.decode()?;
174 let package = XmlReader::parse(&opf_file)?;
175
176 let version = Self::determine_epub_version(&package)?;
177 let has_encryption = archive
178 .by_path(Path::new("META-INF/encryption.xml"))
179 .is_ok();
180
181 let mut doc = Self {
182 archive: Arc::new(Mutex::new(archive)),
183 epub_path,
184 package_path,
185 base_path,
186 version,
187 unique_identifier: String::new(),
188 metadata: vec![],
189 metadata_link: vec![],
190 manifest: HashMap::new(),
191 spine: vec![],
192 encryption: None,
193 catalog: vec![],
194 catalog_title: String::new(),
195 current_spine_index: AtomicUsize::new(0),
196 has_encryption,
197 };
198
199 let metadata_element = package.find_elements_by_name("metadata").next().unwrap();
200 let manifest_element = package.find_elements_by_name("manifest").next().unwrap();
201 let spine_element = package.find_elements_by_name("spine").next().unwrap();
202
203 doc.parse_metadata(metadata_element)?;
204 doc.parse_manifest(manifest_element)?;
205 doc.parse_spine(spine_element)?;
206 doc.parse_encryption()?;
207 doc.parse_catalog()?;
208
209 doc.unique_identifier = if let Some(uid) = package.get_attr("unique-identifier") {
211 doc.metadata.iter().find(|item| {
212 item.property == "identifier" && item.id.as_ref().is_some_and(|id| id == &uid)
213 })
214 } else {
215 doc.metadata
216 .iter()
217 .find(|item| item.property == "identifier")
218 }
219 .map(|item| item.value.clone())
220 .ok_or_else(|| EpubError::NonCanonicalFile {
221 tag: "dc:identifier".to_string(),
222 })?;
223
224 Ok(doc)
225 }
226
227 fn parse_container(data: String) -> Result<PathBuf, EpubError> {
243 let root = XmlReader::parse(&data)?;
244 let rootfile = root
245 .find_elements_by_name("rootfile")
246 .next()
247 .ok_or_else(|| EpubError::NonCanonicalFile {
248 tag: "rootfile".to_string(),
249 })?;
250
251 let attr =
252 rootfile
253 .get_attr("full-path")
254 .ok_or_else(|| EpubError::MissingRequiredAttribute {
255 tag: "rootfile".to_string(),
256 attribute: "full-path".to_string(),
257 })?;
258
259 Ok(PathBuf::from(attr))
260 }
261
262 fn parse_metadata(&mut self, metadata_element: &XmlElement) -> Result<(), EpubError> {
273 const DC_NAMESPACE: &str = "http://purl.org/dc/elements/1.1/";
274 const OPF_NAMESPACE: &str = "http://www.idpf.org/2007/opf";
275
276 let mut metadata = Vec::new();
277 let mut metadata_link = Vec::new();
278 let mut refinements = HashMap::<String, Vec<MetadataRefinement>>::new();
279
280 for element in metadata_element.children() {
281 match &element.namespace {
282 Some(namespace) if namespace == DC_NAMESPACE => {
283 self.parse_dc_metadata(element, &mut metadata)?
284 }
285
286 Some(namespace) if namespace == OPF_NAMESPACE => self.parse_opf_metadata(
287 element,
288 &mut metadata,
289 &mut metadata_link,
290 &mut refinements,
291 )?,
292
293 _ => {}
294 };
295 }
296
297 for item in metadata.iter_mut() {
298 if let Some(id) = &item.id {
299 if let Some(refinements) = refinements.remove(id) {
300 item.refined = refinements;
301 }
302 }
303 }
304
305 self.metadata = metadata;
306 self.metadata_link = metadata_link;
307 Ok(())
308 }
309
310 fn parse_manifest(&mut self, manifest_element: &XmlElement) -> Result<(), EpubError> {
320 let estimated_items = manifest_element.children().count();
321 let mut resources = HashMap::with_capacity(estimated_items);
322
323 for element in manifest_element.children() {
324 let id = element
325 .get_attr("id")
326 .ok_or_else(|| EpubError::MissingRequiredAttribute {
327 tag: element.tag_name(),
328 attribute: "id".to_string(),
329 })?
330 .to_string();
331 let path = element
332 .get_attr("href")
333 .ok_or_else(|| EpubError::MissingRequiredAttribute {
334 tag: element.tag_name(),
335 attribute: "href".to_string(),
336 })?
337 .to_string();
338 let mime = element
339 .get_attr("media-type")
340 .ok_or_else(|| EpubError::MissingRequiredAttribute {
341 tag: element.tag_name(),
342 attribute: "media-type".to_string(),
343 })?
344 .to_string();
345 let properties = element.get_attr("properties");
346 let fallback = element.get_attr("fallback");
347
348 resources.insert(
349 id.clone(),
350 ManifestItem {
351 id,
352 path: self.normalize_manifest_path(&path)?,
353 mime,
354 properties,
355 fallback,
356 },
357 );
358 }
359
360 self.manifest = resources;
361 self.validate_fallback_chains();
362 Ok(())
363 }
364
365 fn parse_spine(&mut self, spine_element: &XmlElement) -> Result<(), EpubError> {
375 let mut spine = Vec::new();
376 for element in spine_element.children() {
377 let idref = element
378 .get_attr("idref")
379 .ok_or_else(|| EpubError::MissingRequiredAttribute {
380 tag: element.tag_name(),
381 attribute: "idref".to_string(),
382 })?
383 .to_string();
384 let id = element.get_attr("id");
385 let linear = element
386 .get_attr("linear")
387 .map(|linear| linear == "yes")
388 .unwrap_or(true);
389 let properties = element.get_attr("properties");
390
391 spine.push(SpineItem {
392 idref,
393 id,
394 linear,
395 properties,
396 });
397 }
398
399 self.spine = spine;
400 Ok(())
401 }
402
403 fn parse_encryption(&mut self) -> Result<(), EpubError> {
413 if !self.has_encryption() {
414 return Ok(());
415 }
416
417 let mut archive = self.archive.lock()?;
418 let encryption_file =
419 get_file_in_zip_archive(&mut archive, "META-INF/encryption.xml")?.decode()?;
420
421 let root = XmlReader::parse(&encryption_file)?;
422
423 let mut encryption_data = Vec::new();
424 for data in root.children() {
425 if data.name != "EncryptedData" {
426 continue;
427 }
428
429 let method = data
430 .find_elements_by_name("EncryptionMethod")
431 .next()
432 .ok_or_else(|| EpubError::NonCanonicalFile {
433 tag: "EncryptionMethod".to_string(),
434 })?;
435 let reference = data
436 .find_elements_by_name("CipherReference")
437 .next()
438 .ok_or_else(|| EpubError::NonCanonicalFile {
439 tag: "CipherReference".to_string(),
440 })?;
441
442 encryption_data.push(EncryptionData {
443 method: method
444 .get_attr("Algorithm")
445 .ok_or_else(|| EpubError::MissingRequiredAttribute {
446 tag: "EncryptionMethod".to_string(),
447 attribute: "Algorithm".to_string(),
448 })?
449 .to_string(),
450 data: reference
451 .get_attr("URI")
452 .ok_or_else(|| EpubError::MissingRequiredAttribute {
453 tag: "CipherReference".to_string(),
454 attribute: "URI".to_string(),
455 })?
456 .to_string(),
457 });
458 }
459
460 if !encryption_data.is_empty() {
461 self.encryption = Some(encryption_data);
462 }
463
464 Ok(())
465 }
466
467 fn parse_catalog(&mut self) -> Result<(), EpubError> {
474 const HEAD_TAGS: [&str; 6] = ["h1", "h2", "h3", "h4", "h5", "h6"];
475
476 let mut archive = self.archive.lock()?;
477 match self.version {
478 EpubVersion::Version2_0 => {
479 let opf_file =
480 get_file_in_zip_archive(&mut archive, self.package_path.to_str().unwrap())?
481 .decode()?;
482 let opf_element = XmlReader::parse(&opf_file)?;
483
484 let toc_id = opf_element
485 .find_children_by_name("spine")
486 .next()
487 .ok_or_else(|| EpubError::NonCanonicalFile {
488 tag: "spine".to_string(),
489 })?
490 .get_attr("toc")
491 .ok_or_else(|| EpubError::MissingRequiredAttribute {
492 tag: "spine".to_string(),
493 attribute: "toc".to_string(),
494 })?
495 .to_owned();
496 let toc_path = self
497 .manifest
498 .get(&toc_id)
499 .ok_or(EpubError::ResourceIdNotExist { id: toc_id })?
500 .path
501 .to_str()
502 .unwrap();
503
504 let ncx_file = get_file_in_zip_archive(&mut archive, toc_path)?.decode()?;
505 let ncx = XmlReader::parse(&ncx_file)?;
506
507 match ncx.find_elements_by_name("docTitle").next() {
508 Some(element) => self.catalog_title = element.text(),
509 None => warn!(
510 "Expecting to get docTitle information from the ncx file, but it's missing."
511 ),
512 };
513
514 let nav_map = ncx.find_elements_by_name("navMap").next().ok_or_else(|| {
515 EpubError::NonCanonicalFile {
516 tag: "navMap".to_string(),
517 }
518 })?;
519
520 self.catalog = self.parse_nav_points(nav_map)?;
521
522 Ok(())
523 }
524
525 EpubVersion::Version3_0 => {
526 let nav_path = self
527 .manifest
528 .values()
529 .find(|item| {
530 if let Some(property) = &item.properties {
531 return property.contains("nav");
532 }
533 false
534 })
535 .map(|item| item.path.clone())
536 .ok_or_else(|| EpubError::NonCanonicalEpub {
537 expected_file: "Navigation Document".to_string(),
538 })?;
539
540 let nav_file =
541 get_file_in_zip_archive(&mut archive, nav_path.to_str().unwrap())?.decode()?;
542
543 let nav_element = XmlReader::parse(&nav_file)?;
544 let nav = nav_element
545 .find_elements_by_name("nav")
546 .find(|&element| element.get_attr("epub:type") == Some(String::from("toc")))
547 .ok_or_else(|| EpubError::NonCanonicalFile {
548 tag: "nav".to_string(),
549 })?;
550 let nav_title = nav.find_children_by_names(&HEAD_TAGS).next();
551 let nav_list = nav.find_children_by_name("ol").next().ok_or_else(|| {
552 EpubError::NonCanonicalFile {
553 tag: "ol".to_string(),
554 }
555 })?;
556
557 self.catalog = self.parse_catalog_list(nav_list)?;
558 if let Some(nav_title) = nav_title {
559 self.catalog_title = nav_title.text();
560 };
561 Ok(())
562 }
563 }
564 }
565
566 pub fn has_encryption(&self) -> bool {
582 self.has_encryption
583 }
584
585 pub fn get_metadata(&self, key: &str) -> Option<Vec<MetadataItem>> {
599 let metadatas = self
600 .metadata
601 .iter()
602 .filter(|item| item.property == key)
603 .cloned()
604 .collect::<Vec<MetadataItem>>();
605
606 (!metadatas.is_empty()).then_some(metadatas)
607 }
608
609 pub fn get_metadata_value(&self, key: &str) -> Option<Vec<String>> {
621 let values = self
622 .metadata
623 .iter()
624 .filter(|item| item.property == key)
625 .map(|item| item.value.clone())
626 .collect::<Vec<String>>();
627
628 (!values.is_empty()).then_some(values)
629 }
630
631 pub fn get_title(&self) -> Result<Vec<String>, EpubError> {
644 self.get_metadata_value("title")
645 .ok_or_else(|| EpubError::NonCanonicalFile {
646 tag: "title".to_string(),
647 })
648 }
649
650 pub fn get_language(&self) -> Result<Vec<String>, EpubError> {
664 self.get_metadata_value("language")
665 .ok_or_else(|| EpubError::NonCanonicalFile {
666 tag: "language".to_string(),
667 })
668 }
669
670 pub fn get_identifier(&self) -> Result<Vec<String>, EpubError> {
686 self.get_metadata_value("identifier")
687 .ok_or_else(|| EpubError::NonCanonicalFile {
688 tag: "identifier".to_string(),
689 })
690 }
691
692 pub fn get_manifest_item(&self, id: &str) -> Result<(Vec<u8>, String), EpubError> {
709 let resource_item = self
710 .manifest
711 .get(id)
712 .cloned()
713 .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
714
715 let path = resource_item.path.to_str().unwrap();
716
717 let mut archive = self.archive.lock()?;
718 let mut data = match archive.by_name(path) {
719 Ok(mut file) => {
720 let mut entry = Vec::<u8>::new();
721 file.read_to_end(&mut entry)?;
722
723 Ok(entry)
724 }
725 Err(ZipError::FileNotFound) => Err(EpubError::ResourceNotFound {
726 resource: path.to_string(),
727 }),
728 Err(err) => Err(EpubError::from(err)),
729 }?;
730
731 if let Some(method) = self.is_encryption_file(path) {
732 data = self.auto_dencrypt(&method, &mut data)?;
733 }
734
735 Ok((data, resource_item.mime))
736 }
737
738 pub fn get_manifest_item_by_path(&self, path: &str) -> Result<(Vec<u8>, String), EpubError> {
757 let id = self
758 .manifest
759 .iter()
760 .find(|(_, item)| item.path.to_str().unwrap() == path)
761 .map(|(id, _)| id.to_string())
762 .ok_or_else(|| EpubError::ResourceNotFound {
763 resource: path.to_string(),
764 })?;
765
766 self.get_manifest_item(&id)
767 }
768
769 pub fn get_manifest_item_with_fallback(
785 &self,
786 id: &str,
787 supported_format: Vec<&str>,
788 ) -> Result<(Vec<u8>, String), EpubError> {
789 let mut manifest_item = self
790 .manifest
791 .get(id)
792 .cloned()
793 .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
794
795 let mut current_manifest_id = id.to_string();
796 let mut fallback_chain = Vec::<String>::new();
797 'fallback: loop {
798 if supported_format.contains(&manifest_item.mime.as_str()) {
799 return self.get_manifest_item(¤t_manifest_id);
800 }
801
802 let fallback_id = manifest_item.fallback.clone();
803
804 match fallback_id {
805 None => break 'fallback,
807
808 Some(id) if fallback_chain.contains(&id) => break 'fallback,
810
811 Some(id) => {
812 fallback_chain.push(id.clone());
813
814 manifest_item = self
818 .manifest
819 .get(&manifest_item.fallback.unwrap())
820 .cloned()
821 .ok_or(EpubError::ResourceIdNotExist { id: id.clone() })?;
822 current_manifest_id = id;
823 }
824 };
825 }
826
827 Err(EpubError::NoSupportedFileFormat)
828 }
829
830 pub fn get_cover(&self) -> Option<(Vec<u8>, String)> {
847 self.manifest
848 .values()
849 .filter_map(|manifest| {
850 if manifest.id.to_ascii_lowercase().contains("cover") {
851 return Some(manifest.id.clone());
852 }
853
854 if let Some(properties) = &manifest.properties {
855 if properties.to_ascii_lowercase().contains("cover") {
856 return Some(manifest.id.clone());
857 }
858 }
859
860 None
861 })
862 .collect::<Vec<String>>()
863 .iter()
864 .find_map(|id| self.get_manifest_item(id).ok())
865 }
866
867 pub fn navigate_by_spine_index(&mut self, index: usize) -> Option<(Vec<u8>, String)> {
886 if index >= self.spine.len() {
887 return None;
888 }
889
890 let manifest_id = self.spine[index].idref.clone();
891 self.current_spine_index.store(index, Ordering::SeqCst);
892 self.get_manifest_item(&manifest_id).ok()
893 }
894
895 pub fn spine_prev(&self) -> Option<(Vec<u8>, String)> {
907 let current_index = self.current_spine_index.load(Ordering::SeqCst);
908 if current_index == 0 || !self.spine[current_index].linear {
909 return None;
910 }
911
912 let prev_index = (0..current_index)
913 .rev()
914 .find(|&index| self.spine[index].linear)?;
915
916 self.current_spine_index.store(prev_index, Ordering::SeqCst);
917 let manifest_id = self.spine[prev_index].idref.clone();
918 self.get_manifest_item(&manifest_id).ok()
919 }
920
921 pub fn spine_next(&mut self) -> Option<(Vec<u8>, String)> {
933 let current_index = self.current_spine_index.load(Ordering::SeqCst);
934 if current_index >= self.spine.len() - 1 || !self.spine[current_index].linear {
935 return None;
936 }
937
938 let next_index =
939 (current_index + 1..self.spine.len()).find(|&index| self.spine[index].linear)?;
940
941 self.current_spine_index.store(next_index, Ordering::SeqCst);
942 let manifest_id = self.spine[next_index].idref.clone();
943 self.get_manifest_item(&manifest_id).ok()
944 }
945
946 pub fn spine_current(&self) -> Option<(Vec<u8>, String)> {
956 let manifest_id = self.spine[self.current_spine_index.load(Ordering::SeqCst)]
957 .idref
958 .clone();
959 self.get_manifest_item(&manifest_id).ok()
960 }
961
962 fn determine_epub_version(opf_element: &XmlElement) -> Result<EpubVersion, EpubError> {
972 if let Some(version) = opf_element.get_attr("version") {
974 match version.as_str() {
975 "2.0" => return Ok(EpubVersion::Version2_0),
976 "3.0" => return Ok(EpubVersion::Version3_0),
977 _ => {}
978 }
979 }
980
981 let spine_element = opf_element
982 .find_elements_by_name("spine")
983 .next()
984 .ok_or_else(|| EpubError::NonCanonicalFile {
985 tag: "spine".to_string(),
986 })?;
987
988 if spine_element.get_attr("toc").is_some() {
990 return Ok(EpubVersion::Version2_0);
991 }
992
993 let manifest_element = opf_element
994 .find_elements_by_name("manifest")
995 .next()
996 .ok_or_else(|| EpubError::NonCanonicalFile {
997 tag: "manifest".to_string(),
998 })?;
999
1000 manifest_element
1002 .children()
1003 .find_map(|element| {
1004 if let Some(id) = element.get_attr("id") {
1005 if id.eq("nav") {
1006 return Some(EpubVersion::Version3_0);
1007 }
1008 }
1009
1010 None
1011 })
1012 .ok_or(EpubError::UnrecognizedEpubVersion)
1013 }
1014
1015 #[inline]
1025 fn parse_dc_metadata(
1026 &self,
1027 element: &XmlElement,
1028 metadata: &mut Vec<MetadataItem>,
1029 ) -> Result<(), EpubError> {
1031 let id = element.get_attr("id");
1032 let lang = element.get_attr("lang");
1033 let property = element.name.clone();
1034 let value = element.text().normalize_whitespace();
1035
1036 let refined = match self.version {
1037 EpubVersion::Version2_0 => element
1040 .attributes
1041 .iter()
1042 .map(|(name, value)| {
1043 let property = name.to_string();
1044 let value = value.to_string().normalize_whitespace();
1045
1046 MetadataRefinement {
1047 refines: id.clone().unwrap(),
1048 property,
1049 value,
1050 lang: None,
1051 scheme: None,
1052 }
1053 })
1054 .collect(),
1055 EpubVersion::Version3_0 => vec![],
1056 };
1057
1058 metadata.push(MetadataItem {
1059 id,
1060 property,
1061 value,
1062 lang,
1063 refined,
1064 });
1065
1066 Ok(())
1067 }
1068
1069 #[inline]
1080 fn parse_opf_metadata(
1081 &self,
1082 element: &XmlElement,
1083 metadata: &mut Vec<MetadataItem>,
1084 metadata_link: &mut Vec<MetadataLinkItem>,
1085 refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1086 ) -> Result<(), EpubError> {
1087 match element.name.as_str() {
1088 "meta" => self.parse_meta_element(element, metadata, refinements),
1089 "link" => self.parse_link_element(element, metadata_link),
1090 _ => Ok(()),
1091 }
1092 }
1093
1094 #[inline]
1095 fn parse_meta_element(
1096 &self,
1097 element: &XmlElement,
1098 metadata: &mut Vec<MetadataItem>,
1099 refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1100 ) -> Result<(), EpubError> {
1101 match self.version {
1102 EpubVersion::Version2_0 => {
1103 let property =
1104 element
1105 .get_attr("name")
1106 .ok_or_else(|| EpubError::NonCanonicalFile {
1107 tag: element.tag_name(),
1108 })?;
1109 let value = element
1110 .get_attr("content")
1111 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1112 tag: element.tag_name(),
1113 attribute: "content".to_string(),
1114 })?
1115 .normalize_whitespace();
1116
1117 metadata.push(MetadataItem {
1118 id: None,
1119 property,
1120 value,
1121 lang: None,
1122 refined: vec![],
1123 });
1124 }
1125
1126 EpubVersion::Version3_0 => {
1127 let property = element.get_attr("property").ok_or_else(|| {
1128 EpubError::MissingRequiredAttribute {
1129 tag: element.tag_name(),
1130 attribute: "property".to_string(),
1131 }
1132 })?;
1133 let value = element.text().normalize_whitespace();
1134 let lang = element.get_attr("lang");
1135
1136 if let Some(refines) = element.get_attr("refines") {
1137 let id = refines.strip_prefix("#").unwrap_or(&refines).to_string();
1138 let scheme = element.get_attr("scheme");
1139 let refinement = MetadataRefinement {
1140 refines: id.clone(),
1141 property,
1142 value,
1143 lang,
1144 scheme,
1145 };
1146
1147 if let Some(refinements) = refinements.get_mut(&id) {
1148 refinements.push(refinement);
1149 } else {
1150 refinements.insert(id, vec![refinement]);
1151 }
1152 } else {
1153 let id = element.get_attr("id");
1154 let item = MetadataItem {
1155 id,
1156 property,
1157 value,
1158 lang,
1159 refined: vec![],
1160 };
1161
1162 metadata.push(item);
1163 };
1164 }
1165 }
1166 Ok(())
1167 }
1168
1169 #[inline]
1170 fn parse_link_element(
1171 &self,
1172 element: &XmlElement,
1173 metadata_link: &mut Vec<MetadataLinkItem>,
1174 ) -> Result<(), EpubError> {
1175 let href = element
1176 .get_attr("href")
1177 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1178 tag: element.tag_name(),
1179 attribute: "href".to_string(),
1180 })?;
1181 let rel = element
1182 .get_attr("rel")
1183 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1184 tag: element.tag_name(),
1185 attribute: "rel".to_string(),
1186 })?;
1187 let hreflang = element.get_attr("hreflang");
1188 let id = element.get_attr("id");
1189 let mime = element.get_attr("media-type");
1190 let properties = element.get_attr("properties");
1191
1192 metadata_link.push(MetadataLinkItem {
1193 href,
1194 rel,
1195 hreflang,
1196 id,
1197 mime,
1198 properties,
1199 refines: None,
1200 });
1201 Ok(())
1202 }
1203
1204 fn parse_nav_points(&self, parent_element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1210 let mut nav_points = Vec::new();
1211 for nav_point in parent_element.find_children_by_name("navPoint") {
1212 let label = match nav_point.find_children_by_name("navLabel").next() {
1213 Some(element) => element.text(),
1214 None => String::new(),
1215 };
1216
1217 let content = nav_point
1218 .find_children_by_name("content")
1219 .next()
1220 .map(|element| PathBuf::from(element.text()));
1221
1222 let play_order = nav_point
1223 .get_attr("playOrder")
1224 .and_then(|order| order.parse::<usize>().ok());
1225
1226 let children = self.parse_nav_points(nav_point)?;
1227
1228 nav_points.push(NavPoint {
1229 label,
1230 content,
1231 play_order,
1232 children,
1233 });
1234 }
1235
1236 nav_points.sort();
1237 Ok(nav_points)
1238 }
1239
1240 fn parse_catalog_list(&self, element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1246 let mut catalog = Vec::new();
1247 for item in element.children() {
1248 if item.tag_name() != "li" {
1249 return Err(EpubError::NonCanonicalFile {
1250 tag: "li".to_string(),
1251 });
1252 }
1253
1254 let title_element = item
1255 .find_children_by_names(&["span", "a"])
1256 .next()
1257 .ok_or_else(|| EpubError::NonCanonicalFile {
1258 tag: "span/a".to_string(),
1259 })?;
1260 let content_href = title_element.get_attr("href").map(PathBuf::from);
1261 let sub_list = if let Some(list) = item.find_children_by_name("ol").next() {
1262 self.parse_catalog_list(list)?
1263 } else {
1264 vec![]
1265 };
1266
1267 catalog.push(NavPoint {
1268 label: title_element.text(),
1269 content: content_href,
1270 children: sub_list,
1271 play_order: None,
1272 });
1273 }
1274
1275 Ok(catalog)
1276 }
1277
1278 #[inline]
1295 fn normalize_manifest_path(&self, path: &str) -> Result<PathBuf, EpubError> {
1296 let mut path = if path.starts_with("../") {
1297 let mut current_dir = self.epub_path.join(&self.package_path);
1298 current_dir.pop();
1299
1300 check_realtive_link_leakage(self.epub_path.clone(), current_dir, path)
1301 .map(PathBuf::from)
1302 .ok_or_else(|| EpubError::RealtiveLinkLeakage {
1303 path: path.to_string(),
1304 })?
1305 } else if let Some(path) = path.strip_prefix("/") {
1306 PathBuf::from(path.to_string())
1307 } else {
1308 self.base_path.join(path)
1309 };
1310
1311 #[cfg(windows)]
1312 {
1313 path = PathBuf::from(path.to_string_lossy().replace('\\', "/"));
1314 }
1315
1316 Ok(path)
1317 }
1318
1319 fn validate_fallback_chains(&self) {
1330 for (id, item) in &self.manifest {
1331 if item.fallback.is_none() {
1332 continue;
1333 }
1334
1335 let mut fallback_chain = Vec::new();
1336 if let Err(msg) = self.validate_fallback_chain(id, &mut fallback_chain) {
1337 warn!("Invalid fallback chain for item {}: {}", id, msg);
1338 }
1339 }
1340 }
1341
1342 fn validate_fallback_chain(
1356 &self,
1357 manifest_id: &str,
1358 fallback_chain: &mut Vec<String>,
1359 ) -> Result<(), String> {
1360 if fallback_chain.contains(&manifest_id.to_string()) {
1361 fallback_chain.push(manifest_id.to_string());
1362
1363 return Err(format!(
1364 "Circular reference detected in fallback chain for {}",
1365 fallback_chain.join("->")
1366 ));
1367 }
1368
1369 let item = self.manifest.get(manifest_id).unwrap();
1371
1372 if let Some(fallback_id) = &item.fallback {
1373 if !self.manifest.contains_key(fallback_id) {
1374 return Err(format!(
1375 "Fallback resource {} does not exist in manifest",
1376 fallback_id
1377 ));
1378 }
1379
1380 fallback_chain.push(manifest_id.to_string());
1381 self.validate_fallback_chain(fallback_id, fallback_chain)
1382 } else {
1383 Ok(())
1385 }
1386 }
1387
1388 fn is_encryption_file(&self, path: &str) -> Option<String> {
1401 self.encryption.as_ref().and_then(|encryptions| {
1402 encryptions
1403 .iter()
1404 .find(|encryption| encryption.data == path)
1405 .map(|encryption| encryption.method.clone())
1406 })
1407 }
1408
1409 #[inline]
1427 fn auto_dencrypt(&self, method: &str, data: &mut [u8]) -> Result<Vec<u8>, EpubError> {
1428 match method {
1429 "http://www.idpf.org/2008/embedding" => {
1430 Ok(idpf_font_dencryption(data, &self.unique_identifier))
1431 }
1432 "http://ns.adobe.com/pdf/enc#RC" => {
1433 Ok(adobe_font_dencryption(data, &self.unique_identifier))
1434 }
1435 _ => Err(EpubError::UnsupportedEncryptedMethod {
1436 method: method.to_string(),
1437 }),
1438 }
1439 }
1440}
1441
1442impl EpubDoc<BufReader<File>> {
1443 pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, EpubError> {
1455 let file = File::open(&path).map_err(EpubError::from)?;
1456 let path = canonicalize(path)?;
1457
1458 Self::from_reader(BufReader::new(file), path)
1459 }
1460}
1461
1462#[cfg(test)]
1463mod tests {
1464 use std::{
1465 fs::File,
1466 io::BufReader,
1467 path::{Path, PathBuf},
1468 };
1469
1470 use crate::{epub::EpubDoc, error::EpubError, utils::XmlReader};
1471
1472 mod package_documents_tests {
1474 use std::{path::Path, sync::atomic::Ordering};
1475
1476 use crate::epub::{EpubDoc, EpubVersion};
1477
1478 #[test]
1482 fn test_pkg_collections_unknown() {
1483 let epub_file = Path::new("./test_case/pkg-collections-unknown.epub");
1484 let doc = EpubDoc::new(epub_file);
1485 assert!(doc.is_ok());
1486 }
1487
1488 #[test]
1492 fn test_pkg_creator_order() {
1493 let epub_file = Path::new("./test_case/pkg-creator-order.epub");
1494 let doc = EpubDoc::new(epub_file);
1495 assert!(doc.is_ok());
1496
1497 let doc = doc.unwrap();
1498 let creators = doc.get_metadata_value("creator");
1499 assert!(creators.is_some());
1500
1501 let creators = creators.unwrap();
1502 assert_eq!(creators.len(), 5);
1503 assert_eq!(
1504 creators,
1505 vec![
1506 "Dave Cramer",
1507 "Wendy Reid",
1508 "Dan Lazin",
1509 "Ivan Herman",
1510 "Brady Duga",
1511 ]
1512 );
1513 }
1514
1515 #[test]
1519 fn test_pkg_manifest_order() {
1520 let epub_file = Path::new("./test_case/pkg-manifest-unknown.epub");
1521 let doc = EpubDoc::new(epub_file);
1522 assert!(doc.is_ok());
1523
1524 let doc = doc.unwrap();
1525 assert_eq!(doc.manifest.len(), 2);
1526 assert!(doc.get_manifest_item("nav").is_ok());
1527 assert!(doc.get_manifest_item("content_001").is_ok());
1528 assert!(doc.get_manifest_item("content_002").is_err());
1529 }
1530
1531 #[test]
1535 fn test_pkg_meta_unknown() {
1536 let epub_file = Path::new("./test_case/pkg-meta-unknown.epub");
1537 let doc = EpubDoc::new(epub_file);
1538 assert!(doc.is_ok());
1539
1540 let doc = doc.unwrap();
1541 let value = doc.get_metadata_value("dcterms:isReferencedBy");
1542 assert!(value.is_some());
1543 let value = value.unwrap();
1544 assert_eq!(value.len(), 1);
1545 assert_eq!(
1546 value,
1547 vec!["https://www.w3.org/TR/epub-rs/#confreq-rs-pkg-meta-unknown"]
1548 );
1549
1550 let value = doc.get_metadata_value("dcterms:modified");
1551 assert!(value.is_some());
1552 let value = value.unwrap();
1553 assert_eq!(value.len(), 1);
1554 assert_eq!(value, vec!["2021-01-11T00:00:00Z"]);
1555
1556 let value = doc.get_metadata_value("dcterms:title");
1557 assert!(value.is_none());
1558 }
1559
1560 #[test]
1564 fn test_pkg_meta_white_space() {
1565 let epub_file = Path::new("./test_case/pkg-meta-whitespace.epub");
1566 let doc = EpubDoc::new(epub_file);
1567 assert!(doc.is_ok());
1568
1569 let doc = doc.unwrap();
1570 let value = doc.get_metadata_value("creator");
1571 assert!(value.is_some());
1572 let value = value.unwrap();
1573 assert_eq!(value.len(), 1);
1574 assert_eq!(value, vec!["Dave Cramer"]);
1575
1576 let value = doc.get_metadata_value("description");
1577 assert!(value.is_some());
1578 let value = value.unwrap();
1579 assert_eq!(value.len(), 1);
1580 assert_eq!(
1581 value,
1582 vec![
1583 "The package document's title and creator contain leading and trailing spaces along with excess internal whitespace. The reading system must render only a single space in all cases."
1584 ]
1585 );
1586 }
1587
1588 #[test]
1592 fn test_pkg_spine_duplicate_item_hyperlink() {
1593 let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-hyperlink.epub");
1594 let doc = EpubDoc::new(epub_file);
1595 assert!(doc.is_ok());
1596
1597 let mut doc = doc.unwrap();
1598 assert_eq!(doc.spine.len(), 4);
1599 assert_eq!(
1600 doc.navigate_by_spine_index(0).unwrap(),
1601 doc.get_manifest_item("content_001").unwrap()
1602 );
1603 assert_eq!(
1604 doc.navigate_by_spine_index(1).unwrap(),
1605 doc.get_manifest_item("content_002").unwrap()
1606 );
1607 assert_eq!(
1608 doc.navigate_by_spine_index(2).unwrap(),
1609 doc.get_manifest_item("content_002").unwrap()
1610 );
1611 assert_eq!(
1612 doc.navigate_by_spine_index(3).unwrap(),
1613 doc.get_manifest_item("content_002").unwrap()
1614 );
1615 }
1616
1617 #[test]
1621 fn test_pkg_spine_duplicate_item_rendering() {
1622 let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-rendering.epub");
1623 let doc = EpubDoc::new(epub_file);
1624 assert!(doc.is_ok());
1625
1626 let mut doc = doc.unwrap();
1627 assert_eq!(doc.spine.len(), 4);
1628
1629 let result = doc.spine_prev();
1630 assert!(result.is_none());
1631
1632 let result = doc.spine_next();
1633 assert!(result.is_some());
1634
1635 doc.spine_next();
1636 doc.spine_next();
1637 let result = doc.spine_next();
1638 assert!(result.is_none());
1639 }
1640
1641 #[test]
1645 fn test_pkg_spine_nonlinear_activation() {
1646 let epub_file = Path::new("./test_case/pkg-spine-nonlinear-activation.epub");
1647 let doc = EpubDoc::new(epub_file);
1648 assert!(doc.is_ok());
1649
1650 let mut doc = doc.unwrap();
1651 assert!(doc.spine_prev().is_none());
1652 assert!(doc.spine_next().is_none());
1653
1654 assert!(doc.navigate_by_spine_index(1).is_some());
1655 assert!(doc.spine_prev().is_none());
1656 assert!(doc.spine_next().is_none());
1657 }
1658
1659 #[test]
1663 fn test_pkg_spine_order() {
1664 let epub_file = Path::new("./test_case/pkg-spine-order.epub");
1665 let doc = EpubDoc::new(epub_file);
1666 assert!(doc.is_ok());
1667
1668 let doc = doc.unwrap();
1669 assert_eq!(doc.spine.len(), 4);
1670 assert_eq!(
1671 doc.spine
1672 .iter()
1673 .map(|item| item.idref.clone())
1674 .collect::<Vec<String>>(),
1675 vec![
1676 "d-content_001",
1677 "c-content_002",
1678 "b-content_003",
1679 "a-content_004",
1680 ]
1681 );
1682 }
1683
1684 #[test]
1688 fn test_spine_order_svg() {
1689 let epub_file = Path::new("./test_case/pkg-spine-order-svg.epub");
1690 let doc = EpubDoc::new(epub_file);
1691 assert!(doc.is_ok());
1692
1693 let mut doc = doc.unwrap();
1694 assert_eq!(doc.spine.len(), 4);
1695
1696 loop {
1697 if let Some(spine) = doc.spine_next() {
1698 let idref = doc.spine[doc.current_spine_index.load(Ordering::Relaxed)]
1699 .idref
1700 .clone();
1701 let resource = doc.get_manifest_item(&idref);
1702 assert!(resource.is_ok());
1703
1704 let resource = resource.unwrap();
1705 assert_eq!(spine, resource);
1706 } else {
1707 break;
1708 }
1709 }
1710
1711 assert_eq!(doc.current_spine_index.load(Ordering::Relaxed), 3);
1712 }
1713
1714 #[test]
1718 fn test_pkg_spine_unknown() {
1719 let epub_file = Path::new("./test_case/pkg-spine-unknown.epub");
1720 let doc = EpubDoc::new(epub_file);
1721 assert!(doc.is_ok());
1722
1723 let doc = doc.unwrap();
1724 assert_eq!(doc.spine.len(), 1);
1725 assert_eq!(doc.spine[0].idref, "content_001");
1726 assert_eq!(doc.spine[0].id, None);
1727 assert_eq!(doc.spine[0].linear, true);
1728 assert_eq!(doc.spine[0].properties, Some("untrustworthy".to_string()));
1729 }
1730
1731 #[test]
1735 fn test_pkg_title_order() {
1736 let epub_file = Path::new("./test_case/pkg-title-order.epub");
1737 let doc = EpubDoc::new(epub_file);
1738 assert!(doc.is_ok());
1739
1740 let doc = doc.unwrap();
1741 let title_list = doc.get_title();
1742 assert!(title_list.is_ok());
1743
1744 let title_list = title_list.unwrap();
1745 assert_eq!(title_list.len(), 6);
1746 assert_eq!(
1747 title_list,
1748 vec![
1749 "pkg-title-order",
1750 "This title must not display first",
1751 "Also, this title must not display first",
1752 "This title also must not display first",
1753 "This title must also not display first",
1754 "This title must not display first, also",
1755 ]
1756 );
1757 }
1758
1759 #[test]
1763 fn test_pkg_unique_id() {
1764 let epub_file = Path::new("./test_case/pkg-unique-id.epub");
1765 let doc_1 = EpubDoc::new(epub_file);
1766 assert!(doc_1.is_ok());
1767
1768 let epub_file = Path::new("./test_case/pkg-unique-id_duplicate.epub");
1769 let doc_2 = EpubDoc::new(epub_file);
1770 assert!(doc_2.is_ok());
1771
1772 let doc_1 = doc_1.unwrap();
1773 let doc_2 = doc_2.unwrap();
1774
1775 assert_eq!(
1776 doc_1.get_identifier().unwrap(),
1777 doc_2.get_identifier().unwrap()
1778 );
1779 assert_eq!(doc_1.unique_identifier, "pkg-unique-id");
1780 assert_eq!(doc_2.unique_identifier, "pkg-unique-id");
1781 }
1782
1783 #[test]
1787 fn test_pkg_version_backward() {
1788 let epub_file = Path::new("./test_case/pkg-version-backward.epub");
1789 let doc = EpubDoc::new(epub_file);
1790 assert!(doc.is_ok());
1791
1792 let doc = doc.unwrap();
1793 assert_eq!(doc.version, EpubVersion::Version3_0);
1794 }
1795
1796 #[test]
1800 fn test_pkg_linked_records() {
1801 let epub_file = Path::new("./test_case/pkg-linked-records.epub");
1802 let doc = EpubDoc::new(epub_file);
1803 assert!(doc.is_ok());
1804
1805 let doc = doc.unwrap();
1806 assert_eq!(doc.metadata_link.len(), 3);
1807
1808 let item = doc.metadata_link.iter().find(|&item| {
1809 if let Some(properties) = &item.properties {
1810 properties.eq("onix")
1811 } else {
1812 false
1813 }
1814 });
1815 assert!(item.is_some());
1816 }
1817
1818 #[test]
1822 fn test_pkg_manifest_unlisted_resource() {
1823 let epub_file = Path::new("./test_case/pkg-manifest-unlisted-resource.epub");
1824 let doc = EpubDoc::new(epub_file);
1825 assert!(doc.is_ok());
1826
1827 let doc = doc.unwrap();
1828 assert!(
1829 doc.get_manifest_item_by_path("EPUB/content_001.xhtml")
1830 .is_ok()
1831 );
1832
1833 assert!(doc.get_manifest_item_by_path("EPUB/red.png").is_err());
1834 let err = doc.get_manifest_item_by_path("EPUB/red.png").unwrap_err();
1835 assert_eq!(
1836 err.to_string(),
1837 "Resource not found: Unable to find resource from \"EPUB/red.png\"."
1838 );
1839 }
1840 }
1841
1842 mod manifest_fallbacks_tests {
1846 use std::path::Path;
1847
1848 use crate::epub::EpubDoc;
1849
1850 #[test]
1854 fn test_pub_foreign_bad_fallback() {
1855 let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
1856 let doc = EpubDoc::new(epub_file);
1857 assert!(doc.is_ok());
1858
1859 let doc = doc.unwrap();
1860 assert!(doc.get_manifest_item("content_001").is_ok());
1861 assert!(doc.get_manifest_item("bar").is_ok());
1862
1863 assert_eq!(
1864 doc.get_manifest_item_with_fallback("content_001", vec!["application/xhtml+xml"])
1865 .unwrap_err()
1866 .to_string(),
1867 "No supported file format: The fallback resource does not contain the file format you support."
1868 );
1869 }
1870
1871 #[test]
1875 fn test_pub_foreign_image() {
1876 let epub_file = Path::new("./test_case/pub-foreign_image.epub");
1877 let doc = EpubDoc::new(epub_file);
1878 assert!(doc.is_ok());
1879
1880 let doc = doc.unwrap();
1881 let result = doc.get_manifest_item_with_fallback(
1882 "image-tiff",
1883 vec!["image/png", "application/xhtml+xml"],
1884 );
1885 assert!(result.is_ok());
1886
1887 let (_, mime) = result.unwrap();
1888 assert_eq!(mime, "image/png");
1889 }
1890
1891 #[test]
1895 fn test_pub_foreign_json_spine() {
1896 let epub_file = Path::new("./test_case/pub-foreign_json-spine.epub");
1897 let doc = EpubDoc::new(epub_file);
1898 assert!(doc.is_ok());
1899
1900 let doc = doc.unwrap();
1901 let result = doc.get_manifest_item_with_fallback(
1902 "content_primary",
1903 vec!["application/xhtml+xml", "application/json"],
1904 );
1905 assert!(result.is_ok());
1906 let (_, mime) = result.unwrap();
1907 assert_eq!(mime, "application/json");
1908
1909 let result = doc
1910 .get_manifest_item_with_fallback("content_primary", vec!["application/xhtml+xml"]);
1911 assert!(result.is_ok());
1912 let (_, mime) = result.unwrap();
1913 assert_eq!(mime, "application/xhtml+xml");
1914 }
1915
1916 #[test]
1920 fn test_pub_foreign_xml_spine() {
1921 let epub_file = Path::new("./test_case/pub-foreign_xml-spine.epub");
1922 let doc = EpubDoc::new(epub_file);
1923 assert!(doc.is_ok());
1924
1925 let doc = doc.unwrap();
1926 let result = doc.get_manifest_item_with_fallback(
1927 "content_primary",
1928 vec!["application/xhtml+xml", "application/xml"],
1929 );
1930 assert!(result.is_ok());
1931 let (_, mime) = result.unwrap();
1932 assert_eq!(mime, "application/xml");
1933
1934 let result = doc
1935 .get_manifest_item_with_fallback("content_primary", vec!["application/xhtml+xml"]);
1936 assert!(result.is_ok());
1937 let (_, mime) = result.unwrap();
1938 assert_eq!(mime, "application/xhtml+xml");
1939 }
1940
1941 #[test]
1945 fn test_pub_foreign_xml_suffix_spine() {
1946 let epub_file = Path::new("./test_case/pub-foreign_xml-suffix-spine.epub");
1947 let doc = EpubDoc::new(epub_file);
1948 assert!(doc.is_ok());
1949
1950 let doc = doc.unwrap();
1951 let result = doc.get_manifest_item_with_fallback(
1952 "content_primary",
1953 vec!["application/xhtml+xml", "application/dtc+xml"],
1954 );
1955 assert!(result.is_ok());
1956 let (_, mime) = result.unwrap();
1957 assert_eq!(mime, "application/dtc+xml");
1958
1959 let result = doc
1960 .get_manifest_item_with_fallback("content_primary", vec!["application/xhtml+xml"]);
1961 assert!(result.is_ok());
1962 let (_, mime) = result.unwrap();
1963 assert_eq!(mime, "application/xhtml+xml");
1964 }
1965 }
1966
1967 mod open_container_format_tests {
1969 use std::{cmp::min, io::Read, path::Path};
1970
1971 use sha1::{Digest, Sha1};
1972
1973 use crate::epub::EpubDoc;
1974
1975 #[test]
1979 fn test_ocf_metainf_inc() {
1980 let epub_file = Path::new("./test_case/ocf-metainf-inc.epub");
1981 let doc = EpubDoc::new(epub_file);
1982 assert!(doc.is_ok());
1983 }
1984
1985 #[test]
1989 fn test_ocf_metainf_manifest() {
1990 let epub_file = Path::new("./test_case/ocf-metainf-manifest.epub");
1991 let doc = EpubDoc::new(epub_file);
1992 assert!(doc.is_ok());
1993 }
1994
1995 #[test]
1999 fn test_ocf_package_arbitrary() {
2000 let epub_file = Path::new("./test_case/ocf-package_arbitrary.epub");
2001 let doc = EpubDoc::new(epub_file);
2002 assert!(doc.is_ok());
2003
2004 let doc = doc.unwrap();
2005 assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2006 }
2007
2008 #[test]
2012 fn test_ocf_package_multiple() {
2013 let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2014 let doc = EpubDoc::new(epub_file);
2015 assert!(doc.is_ok());
2016
2017 let doc = doc.unwrap();
2018 assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2019 assert_eq!(doc.base_path, Path::new("FOO/BAR"));
2020 }
2021
2022 #[test]
2026 fn test_ocf_url_link_leaking_relative() {
2027 let epub_file = Path::new("./test_case/ocf-url_link-leaking-relative.epub");
2028 let doc = EpubDoc::new(epub_file);
2029 assert!(doc.is_err());
2030 assert_eq!(
2031 doc.err().unwrap().to_string(),
2032 String::from(
2033 "Relative link leakage: Path \"../../../../media/imgs/monastery.jpg\" is out of container range."
2034 )
2035 )
2036 }
2037
2038 #[test]
2042 fn test_ocf_url_link_path_absolute() {
2043 let epub_file = Path::new("./test_case/ocf-url_link-path-absolute.epub");
2044 let doc = EpubDoc::new(epub_file);
2045 assert!(doc.is_ok());
2046
2047 let doc = doc.unwrap();
2048 let resource = doc.manifest.get("photo").unwrap();
2049 assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2050 }
2051
2052 #[test]
2056 fn test_ocf_url_link_relative() {
2057 let epub_file = Path::new("./test_case/ocf-url_link-relative.epub");
2058 let doc = EpubDoc::new(epub_file);
2059 assert!(doc.is_ok());
2060
2061 let doc = doc.unwrap();
2062 let resource = doc.manifest.get("photo").unwrap();
2063 assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2064 }
2065
2066 #[test]
2070 fn test_ocf_url_manifest() {
2071 let epub_file = Path::new("./test_case/ocf-url_manifest.epub");
2072 let doc = EpubDoc::new(epub_file);
2073 assert!(doc.is_ok());
2074
2075 let doc = doc.unwrap();
2076 assert!(doc.get_manifest_item("nav").is_ok());
2077 assert!(doc.get_manifest_item("content_001").is_ok());
2078 assert!(doc.get_manifest_item("content_002").is_err());
2079 }
2080
2081 #[test]
2085 fn test_ocf_url_relative() {
2086 let epub_file = Path::new("./test_case/ocf-url_relative.epub");
2087 let doc = EpubDoc::new(epub_file);
2088 assert!(doc.is_ok());
2089
2090 let doc = doc.unwrap();
2091 assert_eq!(doc.package_path, Path::new("foo/BAR/baz.opf"));
2092 assert_eq!(doc.base_path, Path::new("foo/BAR"));
2093 assert_eq!(
2094 doc.manifest.get("nav").unwrap().path,
2095 Path::new("foo/BAR/nav.xhtml")
2096 );
2097 assert_eq!(
2098 doc.manifest.get("content_001").unwrap().path,
2099 Path::new("foo/BAR/qux/content_001.xhtml")
2100 );
2101 assert!(doc.get_manifest_item("nav").is_ok());
2102 assert!(doc.get_manifest_item("content_001").is_ok());
2103 }
2104
2105 #[test]
2110 fn test_ocf_zip_comp() {
2111 let epub_file = Path::new("./test_case/ocf-zip-comp.epub");
2112 let doc = EpubDoc::new(epub_file);
2113 assert!(doc.is_ok());
2114 }
2115
2116 #[test]
2121 fn test_ocf_zip_mult() {
2122 let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2123 let doc = EpubDoc::new(epub_file);
2124 assert!(doc.is_ok());
2125 }
2126
2127 #[test]
2131 fn test_ocf_font_obfuscation() {
2132 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2133 let doc = EpubDoc::new(epub_file);
2134 assert!(doc.is_ok());
2135
2136 let doc = doc.unwrap();
2137 let unique_id = doc.unique_identifier.clone();
2138
2139 let mut hasher = Sha1::new();
2140 hasher.update(unique_id.as_bytes());
2141 let hash = hasher.finalize();
2142 let mut key = vec![0u8; 1040];
2143 for i in 0..1040 {
2144 key[i] = hash[i % hash.len()];
2145 }
2146
2147 assert!(doc.encryption.is_some());
2148 assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2149
2150 let data = &doc.encryption.unwrap()[0];
2151 assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2152
2153 let font_file = doc
2154 .archive
2155 .lock()
2156 .unwrap()
2157 .by_name(&data.data)
2158 .unwrap()
2159 .bytes()
2160 .collect::<Result<Vec<u8>, _>>();
2161 assert!(font_file.is_ok());
2162 let font_file = font_file.unwrap();
2163
2164 let mut deobfuscated = font_file.clone();
2166 for i in 0..min(1040, deobfuscated.len()) {
2167 deobfuscated[i] ^= key[i];
2168 }
2169
2170 assert!(is_valid_font(&deobfuscated));
2171 }
2172
2173 #[test]
2177 fn test_ocf_font_obfuscation_bis() {
2178 let epub_file = Path::new("./test_case/ocf-font_obfuscation_bis.epub");
2179 let doc = EpubDoc::new(epub_file);
2180 assert!(doc.is_ok());
2181
2182 let doc = doc.unwrap();
2183
2184 let wrong_unique_id = "wrong-publication-id";
2185 let mut hasher = Sha1::new();
2186 hasher.update(wrong_unique_id.as_bytes());
2187 let hash = hasher.finalize();
2188 let mut wrong_key = vec![0u8; 1040];
2189 for i in 0..1040 {
2190 wrong_key[i] = hash[i % hash.len()];
2191 }
2192
2193 assert!(doc.encryption.is_some());
2194 assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2195
2196 let data = &doc.encryption.unwrap()[0];
2197 assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2198
2199 let font_file = doc
2200 .archive
2201 .lock()
2202 .unwrap()
2203 .by_name(&data.data)
2204 .unwrap()
2205 .bytes()
2206 .collect::<Result<Vec<u8>, _>>();
2207 assert!(font_file.is_ok());
2208 let font_file = font_file.unwrap();
2209
2210 let mut deobfuscated_with_wrong_key = font_file.clone();
2212 for i in 0..std::cmp::min(1040, deobfuscated_with_wrong_key.len()) {
2213 deobfuscated_with_wrong_key[i] ^= wrong_key[i];
2214 }
2215
2216 assert!(!is_valid_font(&deobfuscated_with_wrong_key));
2217 }
2218
2219 fn is_valid_font(data: &[u8]) -> bool {
2220 if data.len() < 4 {
2221 return false;
2222 }
2223 let sig = &data[0..4];
2224 sig == b"OTTO"
2227 || sig == b"\x00\x01\x00\x00"
2228 || sig == b"\x00\x02\x00\x00"
2229 || sig == b"true"
2230 || sig == b"typ1"
2231 }
2232 }
2233
2234 #[test]
2235 fn test_parse_container() {
2236 let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2237 let doc = EpubDoc::new(epub_file);
2238 assert!(doc.is_ok());
2239
2240 let container = r#"
2242 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2243 <rootfiles></rootfiles>
2244 </container>
2245 "#
2246 .to_string();
2247
2248 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2249 assert!(result.is_err());
2250 assert_eq!(
2251 result.unwrap_err(),
2252 EpubError::NonCanonicalFile {
2253 tag: "rootfile".to_string()
2254 }
2255 );
2256
2257 let container = r#"
2258 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2259 <rootfiles>
2260 <rootfile media-type="application/oebps-package+xml"/>
2261 </rootfiles>
2262 </container>
2263 "#
2264 .to_string();
2265
2266 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2267 assert!(result.is_err());
2268 assert_eq!(
2269 result.unwrap_err(),
2270 EpubError::MissingRequiredAttribute {
2271 tag: "rootfile".to_string(),
2272 attribute: "full-path".to_string(),
2273 }
2274 );
2275
2276 let container = r#"
2277 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2278 <rootfiles>
2279 <rootfile media-type="application/oebps-package+xml" full-path="EPUB/content.opf"/>
2280 </rootfiles>
2281 </container>
2282 "#
2283 .to_string();
2284
2285 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2286 assert!(result.is_ok());
2287 assert_eq!(result.unwrap(), PathBuf::from("EPUB/content.opf"))
2288 }
2289
2290 #[test]
2291 fn test_parse_manifest() {
2292 let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2293 let doc = EpubDoc::new(epub_file);
2294 assert!(doc.is_ok());
2295
2296 let manifest = r#"
2297 <manifest>
2298 <item href="content_001.xhtml" media-type="application/xhtml+xml"/>
2299 <item properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2300 </manifest>
2301 "#;
2302 let mut doc = doc.unwrap();
2303 let element = XmlReader::parse(manifest);
2304 assert!(element.is_ok());
2305
2306 let element = element.unwrap();
2307 let result = doc.parse_manifest(&element);
2308 assert!(result.is_err());
2309 assert_eq!(
2310 result.unwrap_err(),
2311 EpubError::MissingRequiredAttribute {
2312 tag: "item".to_string(),
2313 attribute: "id".to_string(),
2314 },
2315 );
2316
2317 let manifest = r#"
2318 <manifest>
2319 <item id="content_001" media-type="application/xhtml+xml"/>
2320 <item id="nav" properties="nav" media-type="application/xhtml+xml"/>
2321 </manifest>
2322 "#;
2323 let element = XmlReader::parse(manifest);
2324 assert!(element.is_ok());
2325
2326 let element = element.unwrap();
2327 let result = doc.parse_manifest(&element);
2328 assert!(result.is_err());
2329 assert_eq!(
2330 result.unwrap_err(),
2331 EpubError::MissingRequiredAttribute {
2332 tag: "item".to_string(),
2333 attribute: "href".to_string(),
2334 },
2335 );
2336
2337 let manifest = r#"
2338 <manifest>
2339 <item id="content_001" href="content_001.xhtml"/>
2340 <item id="nav" properties="nav" href="nav.xhtml"/>
2341 </manifest>
2342 "#;
2343 let element = XmlReader::parse(manifest);
2344 assert!(element.is_ok());
2345
2346 let element = element.unwrap();
2347 let result = doc.parse_manifest(&element);
2348 assert!(result.is_err());
2349 assert_eq!(
2350 result.unwrap_err(),
2351 EpubError::MissingRequiredAttribute {
2352 tag: "item".to_string(),
2353 attribute: "media-type".to_string(),
2354 },
2355 );
2356
2357 let manifest = r#"
2358 <manifest>
2359 <item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
2360 <item id="nav" properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2361 </manifest>
2362 "#;
2363 let element = XmlReader::parse(manifest);
2364 assert!(element.is_ok());
2365
2366 let element = element.unwrap();
2367 let result = doc.parse_manifest(&element);
2368 assert!(result.is_ok());
2369 }
2370
2371 #[test]
2373 fn test_fn_has_encryption() {
2374 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2375 let doc = EpubDoc::new(epub_file);
2376 assert!(doc.is_ok());
2377
2378 let doc = doc.unwrap();
2379 assert!(doc.has_encryption());
2380 }
2381
2382 #[test]
2384 fn test_fn_parse_encryption() {
2385 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2386 let doc = EpubDoc::new(epub_file);
2387 assert!(doc.is_ok());
2388
2389 let doc = doc.unwrap();
2390 assert!(doc.encryption.is_some());
2391
2392 let encryption = doc.encryption.unwrap();
2393 assert_eq!(encryption.len(), 1);
2394 assert_eq!(encryption[0].method, "http://www.idpf.org/2008/embedding");
2395 assert_eq!(encryption[0].data, "EPUB/fonts/Lobster.ttf");
2396 }
2397
2398 #[test]
2399 fn test_get_metadata_existing_key() {
2400 let epub_file = Path::new("./test_case/epub-33.epub");
2401 let doc = EpubDoc::new(epub_file);
2402 assert!(doc.is_ok());
2403
2404 let doc = doc.unwrap();
2405
2406 let titles = doc.get_metadata("title");
2407 assert!(titles.is_some());
2408
2409 let titles = titles.unwrap();
2410 assert_eq!(titles.len(), 1);
2411 assert_eq!(titles[0].property, "title");
2412 assert_eq!(titles[0].value, "EPUB 3.3");
2413
2414 let languages = doc.get_metadata("language");
2415 assert!(languages.is_some());
2416
2417 let languages = languages.unwrap();
2418 assert_eq!(languages.len(), 1);
2419 assert_eq!(languages[0].property, "language");
2420 assert_eq!(languages[0].value, "en-us");
2421
2422 let language = doc.get_language();
2423 assert!(language.is_ok());
2424 assert_eq!(language.unwrap(), vec!["en-us"]);
2425 }
2426
2427 #[test]
2428 fn test_get_metadata_nonexistent_key() {
2429 let epub_file = Path::new("./test_case/epub-33.epub");
2430 let doc = EpubDoc::new(epub_file);
2431 assert!(doc.is_ok());
2432
2433 let doc = doc.unwrap();
2434 let metadata = doc.get_metadata("nonexistent");
2435 assert!(metadata.is_none());
2436 }
2437
2438 #[test]
2439 fn test_get_metadata_multiple_items_same_type() {
2440 let epub_file = Path::new("./test_case/epub-33.epub");
2441 let doc = EpubDoc::new(epub_file);
2442 assert!(doc.is_ok());
2443
2444 let doc = doc.unwrap();
2445
2446 let creators = doc.get_metadata("creator");
2447 assert!(creators.is_some());
2448
2449 let creators = creators.unwrap();
2450 assert_eq!(creators.len(), 3);
2451
2452 assert_eq!(creators[0].id, Some("creator_id_0".to_string()));
2453 assert_eq!(creators[0].property, "creator");
2454 assert_eq!(creators[0].value, "Matt Garrish, DAISY Consortium");
2455
2456 assert_eq!(creators[1].id, Some("creator_id_1".to_string()));
2457 assert_eq!(creators[1].property, "creator");
2458 assert_eq!(creators[1].value, "Ivan Herman, W3C");
2459
2460 assert_eq!(creators[2].id, Some("creator_id_2".to_string()));
2461 assert_eq!(creators[2].property, "creator");
2462 assert_eq!(creators[2].value, "Dave Cramer, Invited Expert");
2463 }
2464
2465 #[test]
2466 fn test_get_metadata_with_refinement() {
2467 let epub_file = Path::new("./test_case/epub-33.epub");
2468 let doc = EpubDoc::new(epub_file);
2469 assert!(doc.is_ok());
2470
2471 let doc = doc.unwrap();
2472
2473 let title = doc.get_metadata("title");
2474 assert!(title.is_some());
2475
2476 let title = title.unwrap();
2477 assert_eq!(title.len(), 1);
2478 assert_eq!(title[0].refined.len(), 1);
2479 assert_eq!(title[0].refined[0].property, "title-type");
2480 assert_eq!(title[0].refined[0].value, "main");
2481 }
2482
2483 #[test]
2484 fn test_get_manifest_item_with_fallback() {
2485 let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
2486 let doc = EpubDoc::new(epub_file);
2487 assert!(doc.is_ok());
2488
2489 let doc = doc.unwrap();
2490 assert!(doc.get_manifest_item("content_001").is_ok());
2491 assert!(doc.get_manifest_item("bar").is_ok());
2492
2493 if let Ok((_, mime)) = doc.get_manifest_item_with_fallback("content_001", vec!["image/psd"])
2495 {
2496 assert_eq!(mime, "image/psd");
2497 } else {
2498 assert!(false, "get_manifest_item_with_fallback failed");
2499 }
2500
2501 assert_eq!(
2503 doc.get_manifest_item_with_fallback("content_001", vec!["application/xhtml+xml"])
2504 .unwrap_err()
2505 .to_string(),
2506 "No supported file format: The fallback resource does not contain the file format you support."
2507 );
2508 }
2509
2510 #[test]
2511 fn test_get_cover() {
2512 let epub_file = Path::new("./test_case/pkg-cover-image.epub");
2513 let doc = EpubDoc::new(epub_file);
2514 if let Err(err) = &doc {
2515 println!("{}", err);
2516 }
2517 assert!(doc.is_ok());
2518
2519 let doc = doc.unwrap();
2520 let result = doc.get_cover();
2521 assert!(result.is_some());
2522
2523 let (data, mime) = result.unwrap();
2524 assert_eq!(data.len(), 5785);
2525 assert_eq!(mime, "image/jpeg");
2526 }
2527
2528 #[test]
2529 fn test_epub_2() {
2530 let epub_file = Path::new("./test_case/epub-2.epub");
2531 let doc = EpubDoc::new(epub_file);
2532 assert!(doc.is_ok());
2533
2534 let doc = doc.unwrap();
2535
2536 let titles = doc.get_title();
2537 assert!(titles.is_ok());
2538 assert_eq!(titles.unwrap(), vec!["Minimal EPUB 2.0"]);
2539 }
2540}