1use std::{
24 collections::HashMap,
25 fs::{File, canonicalize},
26 io::{BufReader, Read, Seek},
27 path::{Path, PathBuf},
28 sync::{
29 Arc, Mutex,
30 atomic::{AtomicUsize, Ordering},
31 },
32};
33
34use log::warn;
35use zip::{ZipArchive, result::ZipError};
36
37use crate::{
38 error::EpubError,
39 types::{
40 EncryptionData, EpubVersion, ManifestItem, MetadataItem, MetadataLinkItem,
41 MetadataRefinement, NavPoint, SpineItem,
42 },
43 utils::{
44 DecodeBytes, NormalizeWhitespace, XmlElement, XmlReader, adobe_font_dencryption,
45 check_realtive_link_leakage, compression_method_check, get_file_in_zip_archive,
46 idpf_font_dencryption,
47 },
48};
49
50pub struct EpubDoc<R: Read + Seek> {
76 pub(crate) archive: Arc<Mutex<ZipArchive<R>>>,
78
79 pub(crate) epub_path: PathBuf,
81
82 pub package_path: PathBuf,
84
85 pub base_path: PathBuf,
87
88 pub version: EpubVersion,
90
91 pub unique_identifier: String,
95
96 pub metadata: Vec<MetadataItem>,
98
99 pub metadata_link: Vec<MetadataLinkItem>,
101
102 pub manifest: HashMap<String, ManifestItem>,
107
108 pub spine: Vec<SpineItem>,
113
114 pub encryption: Option<Vec<EncryptionData>>,
116
117 pub catalog: Vec<NavPoint>,
119
120 pub catalog_title: String,
122
123 current_spine_index: AtomicUsize,
125
126 has_encryption: bool,
128}
129
130impl<R: Read + Seek> EpubDoc<R> {
131 pub fn from_reader(reader: R, epub_path: PathBuf) -> Result<Self, EpubError> {
150 let mut archive = ZipArchive::new(reader).map_err(EpubError::from)?;
160 let epub_path = canonicalize(epub_path)?;
161
162 compression_method_check(&mut archive)?;
163
164 let container =
165 get_file_in_zip_archive(&mut archive, "META-INF/container.xml")?.decode()?;
166 let package_path = Self::parse_container(container)?;
167 let base_path = package_path
168 .parent()
169 .expect("所有文件的父目录不能为空")
170 .to_path_buf();
171
172 let opf_file =
173 get_file_in_zip_archive(&mut archive, package_path.to_str().unwrap())?.decode()?;
174 let package = XmlReader::parse(&opf_file)?;
175
176 let version = Self::determine_epub_version(&package)?;
177 let has_encryption = archive
178 .by_path(Path::new("META-INF/encryption.xml"))
179 .is_ok();
180
181 let mut doc = Self {
182 archive: Arc::new(Mutex::new(archive)),
183 epub_path,
184 package_path,
185 base_path,
186 version,
187 unique_identifier: String::new(),
188 metadata: vec![],
189 metadata_link: vec![],
190 manifest: HashMap::new(),
191 spine: vec![],
192 encryption: None,
193 catalog: vec![],
194 catalog_title: String::new(),
195 current_spine_index: AtomicUsize::new(0),
196 has_encryption,
197 };
198
199 let metadata_element = package.find_elements_by_name("metadata").next().unwrap();
200 let manifest_element = package.find_elements_by_name("manifest").next().unwrap();
201 let spine_element = package.find_elements_by_name("spine").next().unwrap();
202
203 doc.parse_metadata(metadata_element)?;
204 doc.parse_manifest(manifest_element)?;
205 doc.parse_spine(spine_element)?;
206 doc.parse_encryption()?;
207 doc.parse_catalog()?;
208
209 doc.unique_identifier = if let Some(uid) = package.get_attr("unique-identifier") {
211 doc.metadata.iter().find(|item| {
212 item.property == "identifier" && item.id.as_ref().is_some_and(|id| id == &uid)
213 })
214 } else {
215 doc.metadata
216 .iter()
217 .find(|item| item.property == "identifier")
218 }
219 .map(|item| item.value.clone())
220 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "dc:identifier".to_string() })?;
221
222 Ok(doc)
223 }
224
225 fn parse_container(data: String) -> Result<PathBuf, EpubError> {
241 let root = XmlReader::parse(&data)?;
242 let rootfile = root
243 .find_elements_by_name("rootfile")
244 .next()
245 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "rootfile".to_string() })?;
246
247 let attr =
248 rootfile
249 .get_attr("full-path")
250 .ok_or_else(|| EpubError::MissingRequiredAttribute {
251 tag: "rootfile".to_string(),
252 attribute: "full-path".to_string(),
253 })?;
254
255 Ok(PathBuf::from(attr))
256 }
257
258 fn parse_metadata(&mut self, metadata_element: &XmlElement) -> Result<(), EpubError> {
269 const DC_NAMESPACE: &str = "http://purl.org/dc/elements/1.1/";
270 const OPF_NAMESPACE: &str = "http://www.idpf.org/2007/opf";
271
272 let mut metadata = Vec::new();
273 let mut metadata_link = Vec::new();
274 let mut refinements = HashMap::<String, Vec<MetadataRefinement>>::new();
275
276 for element in metadata_element.children() {
277 match &element.namespace {
278 Some(namespace) if namespace == DC_NAMESPACE => {
279 self.parse_dc_metadata(element, &mut metadata)?
280 }
281
282 Some(namespace) if namespace == OPF_NAMESPACE => self.parse_opf_metadata(
283 element,
284 &mut metadata,
285 &mut metadata_link,
286 &mut refinements,
287 )?,
288
289 _ => {}
290 };
291 }
292
293 for item in metadata.iter_mut() {
294 if let Some(id) = &item.id {
295 if let Some(refinements) = refinements.remove(id) {
296 item.refined = refinements;
297 }
298 }
299 }
300
301 self.metadata = metadata;
302 self.metadata_link = metadata_link;
303 Ok(())
304 }
305
306 fn parse_manifest(&mut self, manifest_element: &XmlElement) -> Result<(), EpubError> {
316 let estimated_items = manifest_element.children().count();
317 let mut resources = HashMap::with_capacity(estimated_items);
318
319 for element in manifest_element.children() {
320 let id = element
321 .get_attr("id")
322 .ok_or_else(|| EpubError::MissingRequiredAttribute {
323 tag: element.tag_name(),
324 attribute: "id".to_string(),
325 })?
326 .to_string();
327 let path = element
328 .get_attr("href")
329 .ok_or_else(|| EpubError::MissingRequiredAttribute {
330 tag: element.tag_name(),
331 attribute: "href".to_string(),
332 })?
333 .to_string();
334 let mime = element
335 .get_attr("media-type")
336 .ok_or_else(|| EpubError::MissingRequiredAttribute {
337 tag: element.tag_name(),
338 attribute: "media-type".to_string(),
339 })?
340 .to_string();
341 let properties = element.get_attr("properties");
342 let fallback = element.get_attr("fallback");
343
344 resources.insert(
345 id.clone(),
346 ManifestItem {
347 id,
348 path: self.normalize_manifest_path(&path)?,
349 mime,
350 properties,
351 fallback,
352 },
353 );
354 }
355
356 self.manifest = resources;
357 self.validate_fallback_chains();
358 Ok(())
359 }
360
361 fn parse_spine(&mut self, spine_element: &XmlElement) -> Result<(), EpubError> {
371 let mut spine = Vec::new();
372 for element in spine_element.children() {
373 let idref = element
374 .get_attr("idref")
375 .ok_or_else(|| EpubError::MissingRequiredAttribute {
376 tag: element.tag_name(),
377 attribute: "idref".to_string(),
378 })?
379 .to_string();
380 let id = element.get_attr("id");
381 let linear = element
382 .get_attr("linear")
383 .map(|linear| linear == "yes")
384 .unwrap_or(true);
385 let properties = element.get_attr("properties");
386
387 spine.push(SpineItem { idref, id, linear, properties });
388 }
389
390 self.spine = spine;
391 Ok(())
392 }
393
394 fn parse_encryption(&mut self) -> Result<(), EpubError> {
404 if !self.has_encryption() {
405 return Ok(());
406 }
407
408 let mut archive = self.archive.lock()?;
409 let encryption_file =
410 get_file_in_zip_archive(&mut archive, "META-INF/encryption.xml")?.decode()?;
411
412 let root = XmlReader::parse(&encryption_file)?;
413
414 let mut encryption_data = Vec::new();
415 for data in root.children() {
416 if data.name != "EncryptedData" {
417 continue;
418 }
419
420 let method = data
421 .find_elements_by_name("EncryptionMethod")
422 .next()
423 .ok_or_else(|| EpubError::NonCanonicalFile {
424 tag: "EncryptionMethod".to_string(),
425 })?;
426 let reference = data
427 .find_elements_by_name("CipherReference")
428 .next()
429 .ok_or_else(|| EpubError::NonCanonicalFile {
430 tag: "CipherReference".to_string(),
431 })?;
432
433 encryption_data.push(EncryptionData {
434 method: method
435 .get_attr("Algorithm")
436 .ok_or_else(|| EpubError::MissingRequiredAttribute {
437 tag: "EncryptionMethod".to_string(),
438 attribute: "Algorithm".to_string(),
439 })?
440 .to_string(),
441 data: reference
442 .get_attr("URI")
443 .ok_or_else(|| EpubError::MissingRequiredAttribute {
444 tag: "CipherReference".to_string(),
445 attribute: "URI".to_string(),
446 })?
447 .to_string(),
448 });
449 }
450
451 if !encryption_data.is_empty() {
452 self.encryption = Some(encryption_data);
453 }
454
455 Ok(())
456 }
457
458 fn parse_catalog(&mut self) -> Result<(), EpubError> {
465 const HEAD_TAGS: [&str; 6] = ["h1", "h2", "h3", "h4", "h5", "h6"];
466
467 let mut archive = self.archive.lock()?;
468 match self.version {
469 EpubVersion::Version2_0 => {
470 let opf_file =
471 get_file_in_zip_archive(&mut archive, self.package_path.to_str().unwrap())?
472 .decode()?;
473 let opf_element = XmlReader::parse(&opf_file)?;
474
475 let toc_id = opf_element
476 .find_children_by_name("spine")
477 .next()
478 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?
479 .get_attr("toc")
480 .ok_or_else(|| EpubError::MissingRequiredAttribute {
481 tag: "spine".to_string(),
482 attribute: "toc".to_string(),
483 })?
484 .to_owned();
485 let toc_path = self
486 .manifest
487 .get(&toc_id)
488 .ok_or(EpubError::ResourceIdNotExist { id: toc_id })?
489 .path
490 .to_str()
491 .unwrap();
492
493 let ncx_file = get_file_in_zip_archive(&mut archive, toc_path)?.decode()?;
494 let ncx = XmlReader::parse(&ncx_file)?;
495
496 match ncx.find_elements_by_name("docTitle").next() {
497 Some(element) => self.catalog_title = element.text(),
498 None => warn!(
499 "Expecting to get docTitle information from the ncx file, but it's missing."
500 ),
501 };
502
503 let nav_map = ncx
504 .find_elements_by_name("navMap")
505 .next()
506 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "navMap".to_string() })?;
507
508 self.catalog = self.parse_nav_points(nav_map)?;
509
510 Ok(())
511 }
512
513 EpubVersion::Version3_0 => {
514 let nav_path = self
515 .manifest
516 .values()
517 .find(|item| {
518 if let Some(property) = &item.properties {
519 return property.contains("nav");
520 }
521 false
522 })
523 .map(|item| item.path.clone())
524 .ok_or_else(|| EpubError::NonCanonicalEpub {
525 expected_file: "Navigation Document".to_string(),
526 })?;
527
528 let nav_file =
529 get_file_in_zip_archive(&mut archive, nav_path.to_str().unwrap())?.decode()?;
530
531 let nav_element = XmlReader::parse(&nav_file)?;
532 let nav = nav_element
533 .find_elements_by_name("nav")
534 .find(|&element| element.get_attr("epub:type") == Some(String::from("toc")))
535 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "nav".to_string() })?;
536 let nav_title = nav.find_children_by_names(&HEAD_TAGS).next();
537 let nav_list = nav
538 .find_children_by_name("ol")
539 .next()
540 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "ol".to_string() })?;
541
542 self.catalog = self.parse_catalog_list(nav_list)?;
543 if let Some(nav_title) = nav_title {
544 self.catalog_title = nav_title.text();
545 };
546 Ok(())
547 }
548 }
549 }
550
551 pub fn has_encryption(&self) -> bool {
567 self.has_encryption
568 }
569
570 pub fn get_metadata(&self, key: &str) -> Option<Vec<MetadataItem>> {
584 let metadatas = self
585 .metadata
586 .iter()
587 .filter(|item| item.property == key)
588 .cloned()
589 .collect::<Vec<MetadataItem>>();
590
591 (!metadatas.is_empty()).then_some(metadatas)
592 }
593
594 pub fn get_metadata_value(&self, key: &str) -> Option<Vec<String>> {
606 let values = self
607 .metadata
608 .iter()
609 .filter(|item| item.property == key)
610 .map(|item| item.value.clone())
611 .collect::<Vec<String>>();
612
613 (!values.is_empty()).then_some(values)
614 }
615
616 pub fn get_title(&self) -> Result<Vec<String>, EpubError> {
629 self.get_metadata_value("title")
630 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "title".to_string() })
631 }
632
633 pub fn get_language(&self) -> Result<Vec<String>, EpubError> {
647 self.get_metadata_value("language")
648 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "language".to_string() })
649 }
650
651 pub fn get_identifier(&self) -> Result<Vec<String>, EpubError> {
667 self.get_metadata_value("identifier")
668 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "identifier".to_string() })
669 }
670
671 pub fn get_manifest_item(&self, id: &str) -> Result<(Vec<u8>, String), EpubError> {
688 let resource_item = self
689 .manifest
690 .get(id)
691 .cloned()
692 .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
693
694 let path = resource_item.path.to_str().unwrap();
695
696 let mut archive = self.archive.lock()?;
697 let mut data = match archive.by_name(path) {
698 Ok(mut file) => {
699 let mut entry = Vec::<u8>::new();
700 file.read_to_end(&mut entry)?;
701
702 Ok(entry)
703 }
704 Err(ZipError::FileNotFound) => {
705 Err(EpubError::ResourceNotFound { resource: path.to_string() })
706 }
707 Err(err) => Err(EpubError::from(err)),
708 }?;
709
710 if let Some(method) = self.is_encryption_file(path) {
711 data = self.auto_dencrypt(&method, &mut data)?;
712 }
713
714 Ok((data, resource_item.mime))
715 }
716
717 pub fn get_manifest_item_by_path(&self, path: &str) -> Result<(Vec<u8>, String), EpubError> {
736 let id = self
737 .manifest
738 .iter()
739 .find(|(_, item)| item.path.to_str().unwrap() == path)
740 .map(|(id, _)| id.to_string())
741 .ok_or_else(|| EpubError::ResourceNotFound { resource: path.to_string() })?;
742
743 self.get_manifest_item(&id)
744 }
745
746 pub fn get_manifest_item_with_fallback(
762 &self,
763 id: &str,
764 supported_format: Vec<&str>,
765 ) -> Result<(Vec<u8>, String), EpubError> {
766 let mut manifest_item = self
767 .manifest
768 .get(id)
769 .cloned()
770 .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
771
772 let mut current_manifest_id = id.to_string();
773 let mut fallback_chain = Vec::<String>::new();
774 'fallback: loop {
775 if supported_format.contains(&manifest_item.mime.as_str()) {
776 return self.get_manifest_item(¤t_manifest_id);
777 }
778
779 let fallback_id = manifest_item.fallback.clone();
780
781 match fallback_id {
782 None => break 'fallback,
784
785 Some(id) if fallback_chain.contains(&id) => break 'fallback,
787
788 Some(id) => {
789 fallback_chain.push(id.clone());
790
791 manifest_item = self
795 .manifest
796 .get(&manifest_item.fallback.unwrap())
797 .cloned()
798 .ok_or(EpubError::ResourceIdNotExist { id: id.clone() })?;
799 current_manifest_id = id;
800 }
801 };
802 }
803
804 Err(EpubError::NoSupportedFileFormat)
805 }
806
807 pub fn get_cover(&self) -> Option<(Vec<u8>, String)> {
824 self.manifest
825 .values()
826 .filter_map(|manifest| {
827 if manifest.id.to_ascii_lowercase().contains("cover") {
828 return Some(manifest.id.clone());
829 }
830
831 if let Some(properties) = &manifest.properties {
832 if properties.to_ascii_lowercase().contains("cover") {
833 return Some(manifest.id.clone());
834 }
835 }
836
837 None
838 })
839 .collect::<Vec<String>>()
840 .iter()
841 .find_map(|id| self.get_manifest_item(id).ok())
842 }
843
844 pub fn navigate_by_spine_index(&mut self, index: usize) -> Option<(Vec<u8>, String)> {
863 if index >= self.spine.len() {
864 return None;
865 }
866
867 let manifest_id = self.spine[index].idref.clone();
868 self.current_spine_index.store(index, Ordering::SeqCst);
869 self.get_manifest_item(&manifest_id).ok()
870 }
871
872 pub fn spine_prev(&self) -> Option<(Vec<u8>, String)> {
884 let current_index = self.current_spine_index.load(Ordering::SeqCst);
885 if current_index == 0 || !self.spine[current_index].linear {
886 return None;
887 }
888
889 let prev_index = (0..current_index)
890 .rev()
891 .find(|&index| self.spine[index].linear)?;
892
893 self.current_spine_index.store(prev_index, Ordering::SeqCst);
894 let manifest_id = self.spine[prev_index].idref.clone();
895 self.get_manifest_item(&manifest_id).ok()
896 }
897
898 pub fn spine_next(&mut self) -> Option<(Vec<u8>, String)> {
910 let current_index = self.current_spine_index.load(Ordering::SeqCst);
911 if current_index >= self.spine.len() - 1 || !self.spine[current_index].linear {
912 return None;
913 }
914
915 let next_index =
916 (current_index + 1..self.spine.len()).find(|&index| self.spine[index].linear)?;
917
918 self.current_spine_index.store(next_index, Ordering::SeqCst);
919 let manifest_id = self.spine[next_index].idref.clone();
920 self.get_manifest_item(&manifest_id).ok()
921 }
922
923 pub fn spine_current(&self) -> Option<(Vec<u8>, String)> {
933 let manifest_id = self.spine[self.current_spine_index.load(Ordering::SeqCst)]
934 .idref
935 .clone();
936 self.get_manifest_item(&manifest_id).ok()
937 }
938
939 fn determine_epub_version(opf_element: &XmlElement) -> Result<EpubVersion, EpubError> {
949 if let Some(version) = opf_element.get_attr("version") {
951 match version.as_str() {
952 "2.0" => return Ok(EpubVersion::Version2_0),
953 "3.0" => return Ok(EpubVersion::Version3_0),
954 _ => {}
955 }
956 }
957
958 let spine_element = opf_element
959 .find_elements_by_name("spine")
960 .next()
961 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?;
962
963 if spine_element.get_attr("toc").is_some() {
965 return Ok(EpubVersion::Version2_0);
966 }
967
968 let manifest_element = opf_element
969 .find_elements_by_name("manifest")
970 .next()
971 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "manifest".to_string() })?;
972
973 manifest_element
975 .children()
976 .find_map(|element| {
977 if let Some(id) = element.get_attr("id") {
978 if id.eq("nav") {
979 return Some(EpubVersion::Version3_0);
980 }
981 }
982
983 None
984 })
985 .ok_or(EpubError::UnrecognizedEpubVersion)
986 }
987
988 #[inline]
998 fn parse_dc_metadata(
999 &self,
1000 element: &XmlElement,
1001 metadata: &mut Vec<MetadataItem>,
1002 ) -> Result<(), EpubError> {
1004 let id = element.get_attr("id");
1005 let lang = element.get_attr("lang");
1006 let property = element.name.clone();
1007 let value = element.text().normalize_whitespace();
1008
1009 let refined = match self.version {
1010 EpubVersion::Version2_0 => element
1013 .attributes
1014 .iter()
1015 .map(|(name, value)| {
1016 let property = name.to_string();
1017 let value = value.to_string().normalize_whitespace();
1018
1019 MetadataRefinement {
1020 refines: id.clone().unwrap(),
1021 property,
1022 value,
1023 lang: None,
1024 scheme: None,
1025 }
1026 })
1027 .collect(),
1028 EpubVersion::Version3_0 => vec![],
1029 };
1030
1031 metadata.push(MetadataItem { id, property, value, lang, refined });
1032
1033 Ok(())
1034 }
1035
1036 #[inline]
1047 fn parse_opf_metadata(
1048 &self,
1049 element: &XmlElement,
1050 metadata: &mut Vec<MetadataItem>,
1051 metadata_link: &mut Vec<MetadataLinkItem>,
1052 refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1053 ) -> Result<(), EpubError> {
1054 match element.name.as_str() {
1055 "meta" => self.parse_meta_element(element, metadata, refinements),
1056 "link" => self.parse_link_element(element, metadata_link),
1057 _ => Ok(()),
1058 }
1059 }
1060
1061 #[inline]
1062 fn parse_meta_element(
1063 &self,
1064 element: &XmlElement,
1065 metadata: &mut Vec<MetadataItem>,
1066 refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1067 ) -> Result<(), EpubError> {
1068 match self.version {
1069 EpubVersion::Version2_0 => {
1070 let property = element
1071 .get_attr("name")
1072 .ok_or_else(|| EpubError::NonCanonicalFile { tag: element.tag_name() })?;
1073 let value = element
1074 .get_attr("content")
1075 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1076 tag: element.tag_name(),
1077 attribute: "content".to_string(),
1078 })?
1079 .normalize_whitespace();
1080
1081 metadata.push(MetadataItem {
1082 id: None,
1083 property,
1084 value,
1085 lang: None,
1086 refined: vec![],
1087 });
1088 }
1089
1090 EpubVersion::Version3_0 => {
1091 let property = element.get_attr("property").ok_or_else(|| {
1092 EpubError::MissingRequiredAttribute {
1093 tag: element.tag_name(),
1094 attribute: "property".to_string(),
1095 }
1096 })?;
1097 let value = element.text().normalize_whitespace();
1098 let lang = element.get_attr("lang");
1099
1100 if let Some(refines) = element.get_attr("refines") {
1101 let id = refines.strip_prefix("#").unwrap_or(&refines).to_string();
1102 let scheme = element.get_attr("scheme");
1103 let refinement = MetadataRefinement {
1104 refines: id.clone(),
1105 property,
1106 value,
1107 lang,
1108 scheme,
1109 };
1110
1111 if let Some(refinements) = refinements.get_mut(&id) {
1112 refinements.push(refinement);
1113 } else {
1114 refinements.insert(id, vec![refinement]);
1115 }
1116 } else {
1117 let id = element.get_attr("id");
1118 let item = MetadataItem {
1119 id,
1120 property,
1121 value,
1122 lang,
1123 refined: vec![],
1124 };
1125
1126 metadata.push(item);
1127 };
1128 }
1129 }
1130 Ok(())
1131 }
1132
1133 #[inline]
1134 fn parse_link_element(
1135 &self,
1136 element: &XmlElement,
1137 metadata_link: &mut Vec<MetadataLinkItem>,
1138 ) -> Result<(), EpubError> {
1139 let href = element
1140 .get_attr("href")
1141 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1142 tag: element.tag_name(),
1143 attribute: "href".to_string(),
1144 })?;
1145 let rel = element
1146 .get_attr("rel")
1147 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1148 tag: element.tag_name(),
1149 attribute: "rel".to_string(),
1150 })?;
1151 let hreflang = element.get_attr("hreflang");
1152 let id = element.get_attr("id");
1153 let mime = element.get_attr("media-type");
1154 let properties = element.get_attr("properties");
1155
1156 metadata_link.push(MetadataLinkItem {
1157 href,
1158 rel,
1159 hreflang,
1160 id,
1161 mime,
1162 properties,
1163 refines: None,
1164 });
1165 Ok(())
1166 }
1167
1168 fn parse_nav_points(&self, parent_element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1174 let mut nav_points = Vec::new();
1175 for nav_point in parent_element.find_children_by_name("navPoint") {
1176 let label = match nav_point.find_children_by_name("navLabel").next() {
1177 Some(element) => element.text(),
1178 None => String::new(),
1179 };
1180
1181 let content = nav_point
1182 .find_children_by_name("content")
1183 .next()
1184 .map(|element| PathBuf::from(element.text()));
1185
1186 let play_order = nav_point
1187 .get_attr("playOrder")
1188 .and_then(|order| order.parse::<usize>().ok());
1189
1190 let children = self.parse_nav_points(nav_point)?;
1191
1192 nav_points.push(NavPoint { label, content, play_order, children });
1193 }
1194
1195 nav_points.sort();
1196 Ok(nav_points)
1197 }
1198
1199 fn parse_catalog_list(&self, element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1205 let mut catalog = Vec::new();
1206 for item in element.children() {
1207 if item.tag_name() != "li" {
1208 return Err(EpubError::NonCanonicalFile { tag: "li".to_string() });
1209 }
1210
1211 let title_element = item
1212 .find_children_by_names(&["span", "a"])
1213 .next()
1214 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "span/a".to_string() })?;
1215 let content_href = title_element.get_attr("href").map(PathBuf::from);
1216 let sub_list = if let Some(list) = item.find_children_by_name("ol").next() {
1217 self.parse_catalog_list(list)?
1218 } else {
1219 vec![]
1220 };
1221
1222 catalog.push(NavPoint {
1223 label: title_element.text(),
1224 content: content_href,
1225 children: sub_list,
1226 play_order: None,
1227 });
1228 }
1229
1230 Ok(catalog)
1231 }
1232
1233 #[inline]
1250 fn normalize_manifest_path(&self, path: &str) -> Result<PathBuf, EpubError> {
1251 let mut path = if path.starts_with("../") {
1252 let mut current_dir = self.epub_path.join(&self.package_path);
1253 current_dir.pop();
1254
1255 check_realtive_link_leakage(self.epub_path.clone(), current_dir, path)
1256 .map(PathBuf::from)
1257 .ok_or_else(|| EpubError::RealtiveLinkLeakage { path: path.to_string() })?
1258 } else if let Some(path) = path.strip_prefix("/") {
1259 PathBuf::from(path.to_string())
1260 } else {
1261 self.base_path.join(path)
1262 };
1263
1264 #[cfg(windows)]
1265 {
1266 path = PathBuf::from(path.to_string_lossy().replace('\\', "/"));
1267 }
1268
1269 Ok(path)
1270 }
1271
1272 fn validate_fallback_chains(&self) {
1283 for (id, item) in &self.manifest {
1284 if item.fallback.is_none() {
1285 continue;
1286 }
1287
1288 let mut fallback_chain = Vec::new();
1289 if let Err(msg) = self.validate_fallback_chain(id, &mut fallback_chain) {
1290 warn!("Invalid fallback chain for item {}: {}", id, msg);
1291 }
1292 }
1293 }
1294
1295 fn validate_fallback_chain(
1309 &self,
1310 manifest_id: &str,
1311 fallback_chain: &mut Vec<String>,
1312 ) -> Result<(), String> {
1313 if fallback_chain.contains(&manifest_id.to_string()) {
1314 fallback_chain.push(manifest_id.to_string());
1315
1316 return Err(format!(
1317 "Circular reference detected in fallback chain for {}",
1318 fallback_chain.join("->")
1319 ));
1320 }
1321
1322 let item = self.manifest.get(manifest_id).unwrap();
1324
1325 if let Some(fallback_id) = &item.fallback {
1326 if !self.manifest.contains_key(fallback_id) {
1327 return Err(format!(
1328 "Fallback resource {} does not exist in manifest",
1329 fallback_id
1330 ));
1331 }
1332
1333 fallback_chain.push(manifest_id.to_string());
1334 self.validate_fallback_chain(fallback_id, fallback_chain)
1335 } else {
1336 Ok(())
1338 }
1339 }
1340
1341 fn is_encryption_file(&self, path: &str) -> Option<String> {
1354 self.encryption.as_ref().and_then(|encryptions| {
1355 encryptions
1356 .iter()
1357 .find(|encryption| encryption.data == path)
1358 .map(|encryption| encryption.method.clone())
1359 })
1360 }
1361
1362 #[inline]
1380 fn auto_dencrypt(&self, method: &str, data: &mut [u8]) -> Result<Vec<u8>, EpubError> {
1381 match method {
1382 "http://www.idpf.org/2008/embedding" => {
1383 Ok(idpf_font_dencryption(data, &self.unique_identifier))
1384 }
1385 "http://ns.adobe.com/pdf/enc#RC" => {
1386 Ok(adobe_font_dencryption(data, &self.unique_identifier))
1387 }
1388 _ => Err(EpubError::UnsupportedEncryptedMethod { method: method.to_string() }),
1389 }
1390 }
1391}
1392
1393impl EpubDoc<BufReader<File>> {
1394 pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, EpubError> {
1406 let file = File::open(&path).map_err(EpubError::from)?;
1407 let path = canonicalize(path)?;
1408
1409 Self::from_reader(BufReader::new(file), path)
1410 }
1411}
1412
1413#[cfg(test)]
1414mod tests {
1415 use std::{
1416 fs::File,
1417 io::BufReader,
1418 path::{Path, PathBuf},
1419 };
1420
1421 use crate::{epub::EpubDoc, error::EpubError, utils::XmlReader};
1422
1423 mod package_documents_tests {
1425 use std::{path::Path, sync::atomic::Ordering};
1426
1427 use crate::epub::{EpubDoc, EpubVersion};
1428
1429 #[test]
1433 fn test_pkg_collections_unknown() {
1434 let epub_file = Path::new("./test_case/pkg-collections-unknown.epub");
1435 let doc = EpubDoc::new(epub_file);
1436 assert!(doc.is_ok());
1437 }
1438
1439 #[test]
1443 fn test_pkg_creator_order() {
1444 let epub_file = Path::new("./test_case/pkg-creator-order.epub");
1445 let doc = EpubDoc::new(epub_file);
1446 assert!(doc.is_ok());
1447
1448 let doc = doc.unwrap();
1449 let creators = doc.get_metadata_value("creator");
1450 assert!(creators.is_some());
1451
1452 let creators = creators.unwrap();
1453 assert_eq!(creators.len(), 5);
1454 assert_eq!(
1455 creators,
1456 vec![
1457 "Dave Cramer",
1458 "Wendy Reid",
1459 "Dan Lazin",
1460 "Ivan Herman",
1461 "Brady Duga",
1462 ]
1463 );
1464 }
1465
1466 #[test]
1470 fn test_pkg_manifest_order() {
1471 let epub_file = Path::new("./test_case/pkg-manifest-unknown.epub");
1472 let doc = EpubDoc::new(epub_file);
1473 assert!(doc.is_ok());
1474
1475 let doc = doc.unwrap();
1476 assert_eq!(doc.manifest.len(), 2);
1477 assert!(doc.get_manifest_item("nav").is_ok());
1478 assert!(doc.get_manifest_item("content_001").is_ok());
1479 assert!(doc.get_manifest_item("content_002").is_err());
1480 }
1481
1482 #[test]
1486 fn test_pkg_meta_unknown() {
1487 let epub_file = Path::new("./test_case/pkg-meta-unknown.epub");
1488 let doc = EpubDoc::new(epub_file);
1489 assert!(doc.is_ok());
1490
1491 let doc = doc.unwrap();
1492 let value = doc.get_metadata_value("dcterms:isReferencedBy");
1493 assert!(value.is_some());
1494 let value = value.unwrap();
1495 assert_eq!(value.len(), 1);
1496 assert_eq!(
1497 value,
1498 vec!["https://www.w3.org/TR/epub-rs/#confreq-rs-pkg-meta-unknown"]
1499 );
1500
1501 let value = doc.get_metadata_value("dcterms:modified");
1502 assert!(value.is_some());
1503 let value = value.unwrap();
1504 assert_eq!(value.len(), 1);
1505 assert_eq!(value, vec!["2021-01-11T00:00:00Z"]);
1506
1507 let value = doc.get_metadata_value("dcterms:title");
1508 assert!(value.is_none());
1509 }
1510
1511 #[test]
1515 fn test_pkg_meta_white_space() {
1516 let epub_file = Path::new("./test_case/pkg-meta-whitespace.epub");
1517 let doc = EpubDoc::new(epub_file);
1518 assert!(doc.is_ok());
1519
1520 let doc = doc.unwrap();
1521 let value = doc.get_metadata_value("creator");
1522 assert!(value.is_some());
1523 let value = value.unwrap();
1524 assert_eq!(value.len(), 1);
1525 assert_eq!(value, vec!["Dave Cramer"]);
1526
1527 let value = doc.get_metadata_value("description");
1528 assert!(value.is_some());
1529 let value = value.unwrap();
1530 assert_eq!(value.len(), 1);
1531 assert_eq!(
1532 value,
1533 vec![
1534 "The package document's title and creator contain leading and trailing spaces along with excess internal whitespace. The reading system must render only a single space in all cases."
1535 ]
1536 );
1537 }
1538
1539 #[test]
1543 fn test_pkg_spine_duplicate_item_hyperlink() {
1544 let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-hyperlink.epub");
1545 let doc = EpubDoc::new(epub_file);
1546 assert!(doc.is_ok());
1547
1548 let mut doc = doc.unwrap();
1549 assert_eq!(doc.spine.len(), 4);
1550 assert_eq!(
1551 doc.navigate_by_spine_index(0).unwrap(),
1552 doc.get_manifest_item("content_001").unwrap()
1553 );
1554 assert_eq!(
1555 doc.navigate_by_spine_index(1).unwrap(),
1556 doc.get_manifest_item("content_002").unwrap()
1557 );
1558 assert_eq!(
1559 doc.navigate_by_spine_index(2).unwrap(),
1560 doc.get_manifest_item("content_002").unwrap()
1561 );
1562 assert_eq!(
1563 doc.navigate_by_spine_index(3).unwrap(),
1564 doc.get_manifest_item("content_002").unwrap()
1565 );
1566 }
1567
1568 #[test]
1572 fn test_pkg_spine_duplicate_item_rendering() {
1573 let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-rendering.epub");
1574 let doc = EpubDoc::new(epub_file);
1575 assert!(doc.is_ok());
1576
1577 let mut doc = doc.unwrap();
1578 assert_eq!(doc.spine.len(), 4);
1579
1580 let result = doc.spine_prev();
1581 assert!(result.is_none());
1582
1583 let result = doc.spine_next();
1584 assert!(result.is_some());
1585
1586 doc.spine_next();
1587 doc.spine_next();
1588 let result = doc.spine_next();
1589 assert!(result.is_none());
1590 }
1591
1592 #[test]
1596 fn test_pkg_spine_nonlinear_activation() {
1597 let epub_file = Path::new("./test_case/pkg-spine-nonlinear-activation.epub");
1598 let doc = EpubDoc::new(epub_file);
1599 assert!(doc.is_ok());
1600
1601 let mut doc = doc.unwrap();
1602 assert!(doc.spine_prev().is_none());
1603 assert!(doc.spine_next().is_none());
1604
1605 assert!(doc.navigate_by_spine_index(1).is_some());
1606 assert!(doc.spine_prev().is_none());
1607 assert!(doc.spine_next().is_none());
1608 }
1609
1610 #[test]
1614 fn test_pkg_spine_order() {
1615 let epub_file = Path::new("./test_case/pkg-spine-order.epub");
1616 let doc = EpubDoc::new(epub_file);
1617 assert!(doc.is_ok());
1618
1619 let doc = doc.unwrap();
1620 assert_eq!(doc.spine.len(), 4);
1621 assert_eq!(
1622 doc.spine
1623 .iter()
1624 .map(|item| item.idref.clone())
1625 .collect::<Vec<String>>(),
1626 vec![
1627 "d-content_001",
1628 "c-content_002",
1629 "b-content_003",
1630 "a-content_004",
1631 ]
1632 );
1633 }
1634
1635 #[test]
1639 fn test_spine_order_svg() {
1640 let epub_file = Path::new("./test_case/pkg-spine-order-svg.epub");
1641 let doc = EpubDoc::new(epub_file);
1642 assert!(doc.is_ok());
1643
1644 let mut doc = doc.unwrap();
1645 assert_eq!(doc.spine.len(), 4);
1646
1647 loop {
1648 if let Some(spine) = doc.spine_next() {
1649 let idref = doc.spine[doc.current_spine_index.load(Ordering::Relaxed)]
1650 .idref
1651 .clone();
1652 let resource = doc.get_manifest_item(&idref);
1653 assert!(resource.is_ok());
1654
1655 let resource = resource.unwrap();
1656 assert_eq!(spine, resource);
1657 } else {
1658 break;
1659 }
1660 }
1661
1662 assert_eq!(doc.current_spine_index.load(Ordering::Relaxed), 3);
1663 }
1664
1665 #[test]
1669 fn test_pkg_spine_unknown() {
1670 let epub_file = Path::new("./test_case/pkg-spine-unknown.epub");
1671 let doc = EpubDoc::new(epub_file);
1672 assert!(doc.is_ok());
1673
1674 let doc = doc.unwrap();
1675 assert_eq!(doc.spine.len(), 1);
1676 assert_eq!(doc.spine[0].idref, "content_001");
1677 assert_eq!(doc.spine[0].id, None);
1678 assert_eq!(doc.spine[0].linear, true);
1679 assert_eq!(doc.spine[0].properties, Some("untrustworthy".to_string()));
1680 }
1681
1682 #[test]
1686 fn test_pkg_title_order() {
1687 let epub_file = Path::new("./test_case/pkg-title-order.epub");
1688 let doc = EpubDoc::new(epub_file);
1689 assert!(doc.is_ok());
1690
1691 let doc = doc.unwrap();
1692 let title_list = doc.get_title();
1693 assert!(title_list.is_ok());
1694
1695 let title_list = title_list.unwrap();
1696 assert_eq!(title_list.len(), 6);
1697 assert_eq!(
1698 title_list,
1699 vec![
1700 "pkg-title-order",
1701 "This title must not display first",
1702 "Also, this title must not display first",
1703 "This title also must not display first",
1704 "This title must also not display first",
1705 "This title must not display first, also",
1706 ]
1707 );
1708 }
1709
1710 #[test]
1714 fn test_pkg_unique_id() {
1715 let epub_file = Path::new("./test_case/pkg-unique-id.epub");
1716 let doc_1 = EpubDoc::new(epub_file);
1717 assert!(doc_1.is_ok());
1718
1719 let epub_file = Path::new("./test_case/pkg-unique-id_duplicate.epub");
1720 let doc_2 = EpubDoc::new(epub_file);
1721 assert!(doc_2.is_ok());
1722
1723 let doc_1 = doc_1.unwrap();
1724 let doc_2 = doc_2.unwrap();
1725
1726 assert_eq!(
1727 doc_1.get_identifier().unwrap(),
1728 doc_2.get_identifier().unwrap()
1729 );
1730 assert_eq!(doc_1.unique_identifier, "pkg-unique-id");
1731 assert_eq!(doc_2.unique_identifier, "pkg-unique-id");
1732 }
1733
1734 #[test]
1738 fn test_pkg_version_backward() {
1739 let epub_file = Path::new("./test_case/pkg-version-backward.epub");
1740 let doc = EpubDoc::new(epub_file);
1741 assert!(doc.is_ok());
1742
1743 let doc = doc.unwrap();
1744 assert_eq!(doc.version, EpubVersion::Version3_0);
1745 }
1746
1747 #[test]
1751 fn test_pkg_linked_records() {
1752 let epub_file = Path::new("./test_case/pkg-linked-records.epub");
1753 let doc = EpubDoc::new(epub_file);
1754 assert!(doc.is_ok());
1755
1756 let doc = doc.unwrap();
1757 assert_eq!(doc.metadata_link.len(), 3);
1758
1759 let item = doc.metadata_link.iter().find(|&item| {
1760 if let Some(properties) = &item.properties {
1761 properties.eq("onix")
1762 } else {
1763 false
1764 }
1765 });
1766 assert!(item.is_some());
1767 }
1768
1769 #[test]
1773 fn test_pkg_manifest_unlisted_resource() {
1774 let epub_file = Path::new("./test_case/pkg-manifest-unlisted-resource.epub");
1775 let doc = EpubDoc::new(epub_file);
1776 assert!(doc.is_ok());
1777
1778 let doc = doc.unwrap();
1779 assert!(
1780 doc.get_manifest_item_by_path("EPUB/content_001.xhtml")
1781 .is_ok()
1782 );
1783
1784 assert!(doc.get_manifest_item_by_path("EPUB/red.png").is_err());
1785 let err = doc.get_manifest_item_by_path("EPUB/red.png").unwrap_err();
1786 assert_eq!(
1787 err.to_string(),
1788 "Resource not found: Unable to find resource from \"EPUB/red.png\"."
1789 );
1790 }
1791 }
1792
1793 mod manifest_fallbacks_tests {
1797 use std::path::Path;
1798
1799 use crate::epub::EpubDoc;
1800
1801 #[test]
1805 fn test_pub_foreign_bad_fallback() {
1806 let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
1807 let doc = EpubDoc::new(epub_file);
1808 assert!(doc.is_ok());
1809
1810 let doc = doc.unwrap();
1811 assert!(doc.get_manifest_item("content_001").is_ok());
1812 assert!(doc.get_manifest_item("bar").is_ok());
1813
1814 assert_eq!(
1815 doc.get_manifest_item_with_fallback("content_001", vec!["application/xhtml+xml"])
1816 .unwrap_err()
1817 .to_string(),
1818 "No supported file format: The fallback resource does not contain the file format you support."
1819 );
1820 }
1821
1822 #[test]
1826 fn test_pub_foreign_image() {
1827 let epub_file = Path::new("./test_case/pub-foreign_image.epub");
1828 let doc = EpubDoc::new(epub_file);
1829 assert!(doc.is_ok());
1830
1831 let doc = doc.unwrap();
1832 let result = doc.get_manifest_item_with_fallback(
1833 "image-tiff",
1834 vec!["image/png", "application/xhtml+xml"],
1835 );
1836 assert!(result.is_ok());
1837
1838 let (_, mime) = result.unwrap();
1839 assert_eq!(mime, "image/png");
1840 }
1841
1842 #[test]
1846 fn test_pub_foreign_json_spine() {
1847 let epub_file = Path::new("./test_case/pub-foreign_json-spine.epub");
1848 let doc = EpubDoc::new(epub_file);
1849 assert!(doc.is_ok());
1850
1851 let doc = doc.unwrap();
1852 let result = doc.get_manifest_item_with_fallback(
1853 "content_primary",
1854 vec!["application/xhtml+xml", "application/json"],
1855 );
1856 assert!(result.is_ok());
1857 let (_, mime) = result.unwrap();
1858 assert_eq!(mime, "application/json");
1859
1860 let result = doc
1861 .get_manifest_item_with_fallback("content_primary", vec!["application/xhtml+xml"]);
1862 assert!(result.is_ok());
1863 let (_, mime) = result.unwrap();
1864 assert_eq!(mime, "application/xhtml+xml");
1865 }
1866
1867 #[test]
1871 fn test_pub_foreign_xml_spine() {
1872 let epub_file = Path::new("./test_case/pub-foreign_xml-spine.epub");
1873 let doc = EpubDoc::new(epub_file);
1874 assert!(doc.is_ok());
1875
1876 let doc = doc.unwrap();
1877 let result = doc.get_manifest_item_with_fallback(
1878 "content_primary",
1879 vec!["application/xhtml+xml", "application/xml"],
1880 );
1881 assert!(result.is_ok());
1882 let (_, mime) = result.unwrap();
1883 assert_eq!(mime, "application/xml");
1884
1885 let result = doc
1886 .get_manifest_item_with_fallback("content_primary", vec!["application/xhtml+xml"]);
1887 assert!(result.is_ok());
1888 let (_, mime) = result.unwrap();
1889 assert_eq!(mime, "application/xhtml+xml");
1890 }
1891
1892 #[test]
1896 fn test_pub_foreign_xml_suffix_spine() {
1897 let epub_file = Path::new("./test_case/pub-foreign_xml-suffix-spine.epub");
1898 let doc = EpubDoc::new(epub_file);
1899 assert!(doc.is_ok());
1900
1901 let doc = doc.unwrap();
1902 let result = doc.get_manifest_item_with_fallback(
1903 "content_primary",
1904 vec!["application/xhtml+xml", "application/dtc+xml"],
1905 );
1906 assert!(result.is_ok());
1907 let (_, mime) = result.unwrap();
1908 assert_eq!(mime, "application/dtc+xml");
1909
1910 let result = doc
1911 .get_manifest_item_with_fallback("content_primary", vec!["application/xhtml+xml"]);
1912 assert!(result.is_ok());
1913 let (_, mime) = result.unwrap();
1914 assert_eq!(mime, "application/xhtml+xml");
1915 }
1916 }
1917
1918 mod open_container_format_tests {
1920 use std::{cmp::min, io::Read, path::Path};
1921
1922 use sha1::{Digest, Sha1};
1923
1924 use crate::epub::EpubDoc;
1925
1926 #[test]
1930 fn test_ocf_metainf_inc() {
1931 let epub_file = Path::new("./test_case/ocf-metainf-inc.epub");
1932 let doc = EpubDoc::new(epub_file);
1933 assert!(doc.is_ok());
1934 }
1935
1936 #[test]
1940 fn test_ocf_metainf_manifest() {
1941 let epub_file = Path::new("./test_case/ocf-metainf-manifest.epub");
1942 let doc = EpubDoc::new(epub_file);
1943 assert!(doc.is_ok());
1944 }
1945
1946 #[test]
1950 fn test_ocf_package_arbitrary() {
1951 let epub_file = Path::new("./test_case/ocf-package_arbitrary.epub");
1952 let doc = EpubDoc::new(epub_file);
1953 assert!(doc.is_ok());
1954
1955 let doc = doc.unwrap();
1956 assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
1957 }
1958
1959 #[test]
1963 fn test_ocf_package_multiple() {
1964 let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
1965 let doc = EpubDoc::new(epub_file);
1966 assert!(doc.is_ok());
1967
1968 let doc = doc.unwrap();
1969 assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
1970 assert_eq!(doc.base_path, Path::new("FOO/BAR"));
1971 }
1972
1973 #[test]
1977 fn test_ocf_url_link_leaking_relative() {
1978 let epub_file = Path::new("./test_case/ocf-url_link-leaking-relative.epub");
1979 let doc = EpubDoc::new(epub_file);
1980 assert!(doc.is_err());
1981 assert_eq!(
1982 doc.err().unwrap().to_string(),
1983 String::from(
1984 "Relative link leakage: Path \"../../../../media/imgs/monastery.jpg\" is out of container range."
1985 )
1986 )
1987 }
1988
1989 #[test]
1993 fn test_ocf_url_link_path_absolute() {
1994 let epub_file = Path::new("./test_case/ocf-url_link-path-absolute.epub");
1995 let doc = EpubDoc::new(epub_file);
1996 assert!(doc.is_ok());
1997
1998 let doc = doc.unwrap();
1999 let resource = doc.manifest.get("photo").unwrap();
2000 assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2001 }
2002
2003 #[test]
2007 fn test_ocf_url_link_relative() {
2008 let epub_file = Path::new("./test_case/ocf-url_link-relative.epub");
2009 let doc = EpubDoc::new(epub_file);
2010 assert!(doc.is_ok());
2011
2012 let doc = doc.unwrap();
2013 let resource = doc.manifest.get("photo").unwrap();
2014 assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2015 }
2016
2017 #[test]
2021 fn test_ocf_url_manifest() {
2022 let epub_file = Path::new("./test_case/ocf-url_manifest.epub");
2023 let doc = EpubDoc::new(epub_file);
2024 assert!(doc.is_ok());
2025
2026 let doc = doc.unwrap();
2027 assert!(doc.get_manifest_item("nav").is_ok());
2028 assert!(doc.get_manifest_item("content_001").is_ok());
2029 assert!(doc.get_manifest_item("content_002").is_err());
2030 }
2031
2032 #[test]
2036 fn test_ocf_url_relative() {
2037 let epub_file = Path::new("./test_case/ocf-url_relative.epub");
2038 let doc = EpubDoc::new(epub_file);
2039 assert!(doc.is_ok());
2040
2041 let doc = doc.unwrap();
2042 assert_eq!(doc.package_path, Path::new("foo/BAR/baz.opf"));
2043 assert_eq!(doc.base_path, Path::new("foo/BAR"));
2044 assert_eq!(
2045 doc.manifest.get("nav").unwrap().path,
2046 Path::new("foo/BAR/nav.xhtml")
2047 );
2048 assert_eq!(
2049 doc.manifest.get("content_001").unwrap().path,
2050 Path::new("foo/BAR/qux/content_001.xhtml")
2051 );
2052 assert!(doc.get_manifest_item("nav").is_ok());
2053 assert!(doc.get_manifest_item("content_001").is_ok());
2054 }
2055
2056 #[test]
2061 fn test_ocf_zip_comp() {
2062 let epub_file = Path::new("./test_case/ocf-zip-comp.epub");
2063 let doc = EpubDoc::new(epub_file);
2064 assert!(doc.is_ok());
2065 }
2066
2067 #[test]
2072 fn test_ocf_zip_mult() {
2073 let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2074 let doc = EpubDoc::new(epub_file);
2075 assert!(doc.is_ok());
2076 }
2077
2078 #[test]
2082 fn test_ocf_font_obfuscation() {
2083 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2084 let doc = EpubDoc::new(epub_file);
2085 assert!(doc.is_ok());
2086
2087 let doc = doc.unwrap();
2088 let unique_id = doc.unique_identifier.clone();
2089
2090 let mut hasher = Sha1::new();
2091 hasher.update(unique_id.as_bytes());
2092 let hash = hasher.finalize();
2093 let mut key = vec![0u8; 1040];
2094 for i in 0..1040 {
2095 key[i] = hash[i % hash.len()];
2096 }
2097
2098 assert!(doc.encryption.is_some());
2099 assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2100
2101 let data = &doc.encryption.unwrap()[0];
2102 assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2103
2104 let font_file = doc
2105 .archive
2106 .lock()
2107 .unwrap()
2108 .by_name(&data.data)
2109 .unwrap()
2110 .bytes()
2111 .collect::<Result<Vec<u8>, _>>();
2112 assert!(font_file.is_ok());
2113 let font_file = font_file.unwrap();
2114
2115 let mut deobfuscated = font_file.clone();
2117 for i in 0..min(1040, deobfuscated.len()) {
2118 deobfuscated[i] ^= key[i];
2119 }
2120
2121 assert!(is_valid_font(&deobfuscated));
2122 }
2123
2124 #[test]
2128 fn test_ocf_font_obfuscation_bis() {
2129 let epub_file = Path::new("./test_case/ocf-font_obfuscation_bis.epub");
2130 let doc = EpubDoc::new(epub_file);
2131 assert!(doc.is_ok());
2132
2133 let doc = doc.unwrap();
2134
2135 let wrong_unique_id = "wrong-publication-id";
2136 let mut hasher = Sha1::new();
2137 hasher.update(wrong_unique_id.as_bytes());
2138 let hash = hasher.finalize();
2139 let mut wrong_key = vec![0u8; 1040];
2140 for i in 0..1040 {
2141 wrong_key[i] = hash[i % hash.len()];
2142 }
2143
2144 assert!(doc.encryption.is_some());
2145 assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2146
2147 let data = &doc.encryption.unwrap()[0];
2148 assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2149
2150 let font_file = doc
2151 .archive
2152 .lock()
2153 .unwrap()
2154 .by_name(&data.data)
2155 .unwrap()
2156 .bytes()
2157 .collect::<Result<Vec<u8>, _>>();
2158 assert!(font_file.is_ok());
2159 let font_file = font_file.unwrap();
2160
2161 let mut deobfuscated_with_wrong_key = font_file.clone();
2163 for i in 0..std::cmp::min(1040, deobfuscated_with_wrong_key.len()) {
2164 deobfuscated_with_wrong_key[i] ^= wrong_key[i];
2165 }
2166
2167 assert!(!is_valid_font(&deobfuscated_with_wrong_key));
2168 }
2169
2170 fn is_valid_font(data: &[u8]) -> bool {
2171 if data.len() < 4 {
2172 return false;
2173 }
2174 let sig = &data[0..4];
2175 sig == b"OTTO"
2178 || sig == b"\x00\x01\x00\x00"
2179 || sig == b"\x00\x02\x00\x00"
2180 || sig == b"true"
2181 || sig == b"typ1"
2182 }
2183 }
2184
2185 #[test]
2186 fn test_parse_container() {
2187 let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2188 let doc = EpubDoc::new(epub_file);
2189 assert!(doc.is_ok());
2190
2191 let container = r#"
2193 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2194 <rootfiles></rootfiles>
2195 </container>
2196 "#
2197 .to_string();
2198
2199 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2200 assert!(result.is_err());
2201 assert_eq!(
2202 result.unwrap_err(),
2203 EpubError::NonCanonicalFile { tag: "rootfile".to_string() }
2204 );
2205
2206 let container = r#"
2207 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2208 <rootfiles>
2209 <rootfile media-type="application/oebps-package+xml"/>
2210 </rootfiles>
2211 </container>
2212 "#
2213 .to_string();
2214
2215 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2216 assert!(result.is_err());
2217 assert_eq!(
2218 result.unwrap_err(),
2219 EpubError::MissingRequiredAttribute {
2220 tag: "rootfile".to_string(),
2221 attribute: "full-path".to_string(),
2222 }
2223 );
2224
2225 let container = r#"
2226 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2227 <rootfiles>
2228 <rootfile media-type="application/oebps-package+xml" full-path="EPUB/content.opf"/>
2229 </rootfiles>
2230 </container>
2231 "#
2232 .to_string();
2233
2234 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2235 assert!(result.is_ok());
2236 assert_eq!(result.unwrap(), PathBuf::from("EPUB/content.opf"))
2237 }
2238
2239 #[test]
2240 fn test_parse_manifest() {
2241 let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2242 let doc = EpubDoc::new(epub_file);
2243 assert!(doc.is_ok());
2244
2245 let manifest = r#"
2246 <manifest>
2247 <item href="content_001.xhtml" media-type="application/xhtml+xml"/>
2248 <item properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2249 </manifest>
2250 "#;
2251 let mut doc = doc.unwrap();
2252 let element = XmlReader::parse(manifest);
2253 assert!(element.is_ok());
2254
2255 let element = element.unwrap();
2256 let result = doc.parse_manifest(&element);
2257 assert!(result.is_err());
2258 assert_eq!(
2259 result.unwrap_err(),
2260 EpubError::MissingRequiredAttribute {
2261 tag: "item".to_string(),
2262 attribute: "id".to_string(),
2263 },
2264 );
2265
2266 let manifest = r#"
2267 <manifest>
2268 <item id="content_001" media-type="application/xhtml+xml"/>
2269 <item id="nav" properties="nav" media-type="application/xhtml+xml"/>
2270 </manifest>
2271 "#;
2272 let element = XmlReader::parse(manifest);
2273 assert!(element.is_ok());
2274
2275 let element = element.unwrap();
2276 let result = doc.parse_manifest(&element);
2277 assert!(result.is_err());
2278 assert_eq!(
2279 result.unwrap_err(),
2280 EpubError::MissingRequiredAttribute {
2281 tag: "item".to_string(),
2282 attribute: "href".to_string(),
2283 },
2284 );
2285
2286 let manifest = r#"
2287 <manifest>
2288 <item id="content_001" href="content_001.xhtml"/>
2289 <item id="nav" properties="nav" href="nav.xhtml"/>
2290 </manifest>
2291 "#;
2292 let element = XmlReader::parse(manifest);
2293 assert!(element.is_ok());
2294
2295 let element = element.unwrap();
2296 let result = doc.parse_manifest(&element);
2297 assert!(result.is_err());
2298 assert_eq!(
2299 result.unwrap_err(),
2300 EpubError::MissingRequiredAttribute {
2301 tag: "item".to_string(),
2302 attribute: "media-type".to_string(),
2303 },
2304 );
2305
2306 let manifest = r#"
2307 <manifest>
2308 <item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
2309 <item id="nav" properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2310 </manifest>
2311 "#;
2312 let element = XmlReader::parse(manifest);
2313 assert!(element.is_ok());
2314
2315 let element = element.unwrap();
2316 let result = doc.parse_manifest(&element);
2317 assert!(result.is_ok());
2318 }
2319
2320 #[test]
2322 fn test_fn_has_encryption() {
2323 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2324 let doc = EpubDoc::new(epub_file);
2325 assert!(doc.is_ok());
2326
2327 let doc = doc.unwrap();
2328 assert!(doc.has_encryption());
2329 }
2330
2331 #[test]
2333 fn test_fn_parse_encryption() {
2334 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2335 let doc = EpubDoc::new(epub_file);
2336 assert!(doc.is_ok());
2337
2338 let doc = doc.unwrap();
2339 assert!(doc.encryption.is_some());
2340
2341 let encryption = doc.encryption.unwrap();
2342 assert_eq!(encryption.len(), 1);
2343 assert_eq!(encryption[0].method, "http://www.idpf.org/2008/embedding");
2344 assert_eq!(encryption[0].data, "EPUB/fonts/Lobster.ttf");
2345 }
2346
2347 #[test]
2348 fn test_get_metadata_existing_key() {
2349 let epub_file = Path::new("./test_case/epub-33.epub");
2350 let doc = EpubDoc::new(epub_file);
2351 assert!(doc.is_ok());
2352
2353 let doc = doc.unwrap();
2354
2355 let titles = doc.get_metadata("title");
2356 assert!(titles.is_some());
2357
2358 let titles = titles.unwrap();
2359 assert_eq!(titles.len(), 1);
2360 assert_eq!(titles[0].property, "title");
2361 assert_eq!(titles[0].value, "EPUB 3.3");
2362
2363 let languages = doc.get_metadata("language");
2364 assert!(languages.is_some());
2365
2366 let languages = languages.unwrap();
2367 assert_eq!(languages.len(), 1);
2368 assert_eq!(languages[0].property, "language");
2369 assert_eq!(languages[0].value, "en-us");
2370
2371 let language = doc.get_language();
2372 assert!(language.is_ok());
2373 assert_eq!(language.unwrap(), vec!["en-us"]);
2374 }
2375
2376 #[test]
2377 fn test_get_metadata_nonexistent_key() {
2378 let epub_file = Path::new("./test_case/epub-33.epub");
2379 let doc = EpubDoc::new(epub_file);
2380 assert!(doc.is_ok());
2381
2382 let doc = doc.unwrap();
2383 let metadata = doc.get_metadata("nonexistent");
2384 assert!(metadata.is_none());
2385 }
2386
2387 #[test]
2388 fn test_get_metadata_multiple_items_same_type() {
2389 let epub_file = Path::new("./test_case/epub-33.epub");
2390 let doc = EpubDoc::new(epub_file);
2391 assert!(doc.is_ok());
2392
2393 let doc = doc.unwrap();
2394
2395 let creators = doc.get_metadata("creator");
2396 assert!(creators.is_some());
2397
2398 let creators = creators.unwrap();
2399 assert_eq!(creators.len(), 3);
2400
2401 assert_eq!(creators[0].id, Some("creator_id_0".to_string()));
2402 assert_eq!(creators[0].property, "creator");
2403 assert_eq!(creators[0].value, "Matt Garrish, DAISY Consortium");
2404
2405 assert_eq!(creators[1].id, Some("creator_id_1".to_string()));
2406 assert_eq!(creators[1].property, "creator");
2407 assert_eq!(creators[1].value, "Ivan Herman, W3C");
2408
2409 assert_eq!(creators[2].id, Some("creator_id_2".to_string()));
2410 assert_eq!(creators[2].property, "creator");
2411 assert_eq!(creators[2].value, "Dave Cramer, Invited Expert");
2412 }
2413
2414 #[test]
2415 fn test_get_metadata_with_refinement() {
2416 let epub_file = Path::new("./test_case/epub-33.epub");
2417 let doc = EpubDoc::new(epub_file);
2418 assert!(doc.is_ok());
2419
2420 let doc = doc.unwrap();
2421
2422 let title = doc.get_metadata("title");
2423 assert!(title.is_some());
2424
2425 let title = title.unwrap();
2426 assert_eq!(title.len(), 1);
2427 assert_eq!(title[0].refined.len(), 1);
2428 assert_eq!(title[0].refined[0].property, "title-type");
2429 assert_eq!(title[0].refined[0].value, "main");
2430 }
2431
2432 #[test]
2433 fn test_get_manifest_item_with_fallback() {
2434 let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
2435 let doc = EpubDoc::new(epub_file);
2436 assert!(doc.is_ok());
2437
2438 let doc = doc.unwrap();
2439 assert!(doc.get_manifest_item("content_001").is_ok());
2440 assert!(doc.get_manifest_item("bar").is_ok());
2441
2442 if let Ok((_, mime)) = doc.get_manifest_item_with_fallback("content_001", vec!["image/psd"])
2444 {
2445 assert_eq!(mime, "image/psd");
2446 } else {
2447 assert!(false, "get_manifest_item_with_fallback failed");
2448 }
2449
2450 assert_eq!(
2452 doc.get_manifest_item_with_fallback("content_001", vec!["application/xhtml+xml"])
2453 .unwrap_err()
2454 .to_string(),
2455 "No supported file format: The fallback resource does not contain the file format you support."
2456 );
2457 }
2458
2459 #[test]
2460 fn test_get_cover() {
2461 let epub_file = Path::new("./test_case/pkg-cover-image.epub");
2462 let doc = EpubDoc::new(epub_file);
2463 if let Err(err) = &doc {
2464 println!("{}", err);
2465 }
2466 assert!(doc.is_ok());
2467
2468 let doc = doc.unwrap();
2469 let result = doc.get_cover();
2470 assert!(result.is_some());
2471
2472 let (data, mime) = result.unwrap();
2473 assert_eq!(data.len(), 5785);
2474 assert_eq!(mime, "image/jpeg");
2475 }
2476
2477 #[test]
2478 fn test_epub_2() {
2479 let epub_file = Path::new("./test_case/epub-2.epub");
2480 let doc = EpubDoc::new(epub_file);
2481 assert!(doc.is_ok());
2482
2483 let doc = doc.unwrap();
2484
2485 let titles = doc.get_title();
2486 assert!(titles.is_ok());
2487 assert_eq!(titles.unwrap(), vec!["Minimal EPUB 2.0"]);
2488 }
2489}