1use std::{
24 collections::HashMap,
25 fs::{self, File},
26 io::{BufReader, Read, Seek},
27 path::{Path, PathBuf},
28 sync::{
29 Arc, Mutex,
30 atomic::{AtomicUsize, Ordering},
31 },
32};
33
34use log::warn;
35use zip::{ZipArchive, result::ZipError};
36
37use crate::{
38 error::EpubError,
39 types::{
40 EncryptionData, EpubVersion, ManifestItem, MetadataItem, MetadataLinkItem,
41 MetadataRefinement, NavPoint, SpineItem,
42 },
43 utils::{
44 DecodeBytes, NormalizeWhitespace, XmlElement, XmlReader, adobe_font_dencryption,
45 check_realtive_link_leakage, compression_method_check, get_file_in_zip_archive,
46 idpf_font_dencryption,
47 },
48};
49
50pub struct EpubDoc<R: Read + Seek> {
76 pub(crate) archive: Arc<Mutex<ZipArchive<R>>>,
78
79 pub(crate) epub_path: PathBuf,
81
82 pub package_path: PathBuf,
84
85 pub base_path: PathBuf,
87
88 pub version: EpubVersion,
90
91 pub unique_identifier: String,
95
96 pub metadata: Vec<MetadataItem>,
98
99 pub metadata_link: Vec<MetadataLinkItem>,
101
102 pub manifest: HashMap<String, ManifestItem>,
107
108 pub spine: Vec<SpineItem>,
113
114 pub encryption: Option<Vec<EncryptionData>>,
116
117 pub catalog: Vec<NavPoint>,
119
120 pub catalog_title: String,
122
123 current_spine_index: AtomicUsize,
125
126 has_encryption: bool,
128}
129
130impl<R: Read + Seek> EpubDoc<R> {
131 pub fn from_reader(reader: R, epub_path: PathBuf) -> Result<Self, EpubError> {
150 let mut archive = ZipArchive::new(reader).map_err(EpubError::from)?;
160 let epub_path = fs::canonicalize(epub_path)?;
161
162 compression_method_check(&mut archive)?;
163
164 let container =
165 get_file_in_zip_archive(&mut archive, "META-INF/container.xml")?.decode()?;
166 let package_path = Self::parse_container(container)?;
167 let base_path = package_path
168 .parent()
169 .expect("所有文件的父目录不能为空")
170 .to_path_buf();
171
172 let opf_file =
173 get_file_in_zip_archive(&mut archive, package_path.to_str().unwrap())?.decode()?;
174 let package = XmlReader::parse(&opf_file)?;
175
176 let version = Self::determine_epub_version(&package)?;
177 let has_encryption = archive
178 .by_path(Path::new("META-INF/encryption.xml"))
179 .is_ok();
180
181 let mut doc = Self {
182 archive: Arc::new(Mutex::new(archive)),
183 epub_path,
184 package_path,
185 base_path,
186 version,
187 unique_identifier: String::new(),
188 metadata: vec![],
189 metadata_link: vec![],
190 manifest: HashMap::new(),
191 spine: vec![],
192 encryption: None,
193 catalog: vec![],
194 catalog_title: String::new(),
195 current_spine_index: AtomicUsize::new(0),
196 has_encryption,
197 };
198
199 let metadata_element = package.find_elements_by_name("metadata").next().unwrap();
200 let manifest_element = package.find_elements_by_name("manifest").next().unwrap();
201 let spine_element = package.find_elements_by_name("spine").next().unwrap();
202
203 doc.parse_metadata(metadata_element)?;
204 doc.parse_manifest(manifest_element)?;
205 doc.parse_spine(spine_element)?;
206 doc.parse_encryption()?;
207 doc.parse_catalog()?;
208
209 doc.unique_identifier = if let Some(uid) = package.get_attr("unique-identifier") {
211 doc.metadata.iter().find(|item| {
212 item.property == "identifier" && item.id.as_ref().is_some_and(|id| id == &uid)
213 })
214 } else {
215 doc.metadata
216 .iter()
217 .find(|item| item.property == "identifier")
218 }
219 .map(|item| item.value.clone())
220 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "dc:identifier".to_string() })?;
221
222 Ok(doc)
223 }
224
225 fn parse_container(data: String) -> Result<PathBuf, EpubError> {
241 let root = XmlReader::parse(&data)?;
242 let rootfile = root
243 .find_elements_by_name("rootfile")
244 .next()
245 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "rootfile".to_string() })?;
246
247 let attr =
248 rootfile
249 .get_attr("full-path")
250 .ok_or_else(|| EpubError::MissingRequiredAttribute {
251 tag: "rootfile".to_string(),
252 attribute: "full-path".to_string(),
253 })?;
254
255 Ok(PathBuf::from(attr))
256 }
257
258 fn parse_metadata(&mut self, metadata_element: &XmlElement) -> Result<(), EpubError> {
269 const DC_NAMESPACE: &str = "http://purl.org/dc/elements/1.1/";
270 const OPF_NAMESPACE: &str = "http://www.idpf.org/2007/opf";
271
272 let mut metadata = Vec::new();
273 let mut metadata_link = Vec::new();
274 let mut refinements = HashMap::<String, Vec<MetadataRefinement>>::new();
275
276 for element in metadata_element.children() {
277 match &element.namespace {
278 Some(namespace) if namespace == DC_NAMESPACE => {
279 self.parse_dc_metadata(element, &mut metadata)?
280 }
281
282 Some(namespace) if namespace == OPF_NAMESPACE => self.parse_opf_metadata(
283 element,
284 &mut metadata,
285 &mut metadata_link,
286 &mut refinements,
287 )?,
288
289 _ => {}
290 };
291 }
292
293 for item in metadata.iter_mut() {
294 if let Some(id) = &item.id {
295 if let Some(refinements) = refinements.remove(id) {
296 item.refined = refinements;
297 }
298 }
299 }
300
301 self.metadata = metadata;
302 self.metadata_link = metadata_link;
303 Ok(())
304 }
305
306 fn parse_manifest(&mut self, manifest_element: &XmlElement) -> Result<(), EpubError> {
316 let estimated_items = manifest_element.children().count();
317 let mut resources = HashMap::with_capacity(estimated_items);
318
319 for element in manifest_element.children() {
320 let id = element
321 .get_attr("id")
322 .ok_or_else(|| EpubError::MissingRequiredAttribute {
323 tag: element.tag_name(),
324 attribute: "id".to_string(),
325 })?
326 .to_string();
327 let path = element
328 .get_attr("href")
329 .ok_or_else(|| EpubError::MissingRequiredAttribute {
330 tag: element.tag_name(),
331 attribute: "href".to_string(),
332 })?
333 .to_string();
334 let mime = element
335 .get_attr("media-type")
336 .ok_or_else(|| EpubError::MissingRequiredAttribute {
337 tag: element.tag_name(),
338 attribute: "media-type".to_string(),
339 })?
340 .to_string();
341 let properties = element.get_attr("properties");
342 let fallback = element.get_attr("fallback");
343
344 resources.insert(
345 id.clone(),
346 ManifestItem {
347 id,
348 path: self.normalize_manifest_path(&path)?,
349 mime,
350 properties,
351 fallback,
352 },
353 );
354 }
355
356 self.manifest = resources;
357 self.validate_fallback_chains();
358 Ok(())
359 }
360
361 fn parse_spine(&mut self, spine_element: &XmlElement) -> Result<(), EpubError> {
371 let mut spine = Vec::new();
372 for element in spine_element.children() {
373 let idref = element
374 .get_attr("idref")
375 .ok_or_else(|| EpubError::MissingRequiredAttribute {
376 tag: element.tag_name(),
377 attribute: "idref".to_string(),
378 })?
379 .to_string();
380 let id = element.get_attr("id");
381 let linear = element
382 .get_attr("linear")
383 .map(|linear| linear == "yes")
384 .unwrap_or(true);
385 let properties = element.get_attr("properties");
386
387 spine.push(SpineItem { idref, id, linear, properties });
388 }
389
390 self.spine = spine;
391 Ok(())
392 }
393
394 fn parse_encryption(&mut self) -> Result<(), EpubError> {
404 if !self.has_encryption() {
405 return Ok(());
406 }
407
408 let mut archive = self.archive.lock()?;
409 let encryption_file =
410 get_file_in_zip_archive(&mut archive, "META-INF/encryption.xml")?.decode()?;
411
412 let root = XmlReader::parse(&encryption_file)?;
413
414 let mut encryption_data = Vec::new();
415 for data in root.children() {
416 if data.name != "EncryptedData" {
417 continue;
418 }
419
420 let method = data
421 .find_elements_by_name("EncryptionMethod")
422 .next()
423 .ok_or_else(|| EpubError::NonCanonicalFile {
424 tag: "EncryptionMethod".to_string(),
425 })?;
426 let reference = data
427 .find_elements_by_name("CipherReference")
428 .next()
429 .ok_or_else(|| EpubError::NonCanonicalFile {
430 tag: "CipherReference".to_string(),
431 })?;
432
433 encryption_data.push(EncryptionData {
434 method: method
435 .get_attr("Algorithm")
436 .ok_or_else(|| EpubError::MissingRequiredAttribute {
437 tag: "EncryptionMethod".to_string(),
438 attribute: "Algorithm".to_string(),
439 })?
440 .to_string(),
441 data: reference
442 .get_attr("URI")
443 .ok_or_else(|| EpubError::MissingRequiredAttribute {
444 tag: "CipherReference".to_string(),
445 attribute: "URI".to_string(),
446 })?
447 .to_string(),
448 });
449 }
450
451 if !encryption_data.is_empty() {
452 self.encryption = Some(encryption_data);
453 }
454
455 Ok(())
456 }
457
458 fn parse_catalog(&mut self) -> Result<(), EpubError> {
465 const HEAD_TAGS: [&str; 6] = ["h1", "h2", "h3", "h4", "h5", "h6"];
466
467 let mut archive = self.archive.lock()?;
468 match self.version {
469 EpubVersion::Version2_0 => {
470 let opf_file =
471 get_file_in_zip_archive(&mut archive, self.package_path.to_str().unwrap())?
472 .decode()?;
473 let opf_element = XmlReader::parse(&opf_file)?;
474
475 let toc_id = opf_element
476 .find_children_by_name("spine")
477 .next()
478 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?
479 .get_attr("toc")
480 .ok_or_else(|| EpubError::MissingRequiredAttribute {
481 tag: "spine".to_string(),
482 attribute: "toc".to_string(),
483 })?
484 .to_owned();
485 let toc_path = self
486 .manifest
487 .get(&toc_id)
488 .ok_or(EpubError::ResourceIdNotExist { id: toc_id })?
489 .path
490 .to_str()
491 .unwrap();
492
493 let ncx_file = get_file_in_zip_archive(&mut archive, toc_path)?.decode()?;
494 let ncx = XmlReader::parse(&ncx_file)?;
495
496 match ncx.find_elements_by_name("docTitle").next() {
497 Some(element) => self.catalog_title = element.text(),
498 None => warn!(
499 "Expecting to get docTitle information from the ncx file, but it's missing."
500 ),
501 };
502
503 let nav_map = ncx
504 .find_elements_by_name("navMap")
505 .next()
506 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "navMap".to_string() })?;
507
508 self.catalog = self.parse_nav_points(nav_map)?;
509
510 Ok(())
511 }
512
513 EpubVersion::Version3_0 => {
514 let nav_path = self
515 .manifest
516 .values()
517 .find(|item| {
518 if let Some(property) = &item.properties {
519 return property.contains("nav");
520 }
521 false
522 })
523 .map(|item| item.path.clone())
524 .ok_or_else(|| EpubError::NonCanonicalEpub {
525 expected_file: "Navigation Document".to_string(),
526 })?;
527
528 let nav_file =
529 get_file_in_zip_archive(&mut archive, nav_path.to_str().unwrap())?.decode()?;
530
531 let nav_element = XmlReader::parse(&nav_file)?;
532 let nav = nav_element
533 .find_elements_by_name("nav")
534 .find(|&element| element.get_attr("epub:type") == Some(String::from("toc")))
535 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "nav".to_string() })?;
536 let nav_title = nav.find_children_by_names(&HEAD_TAGS).next();
537 let nav_list = nav
538 .find_children_by_name("ol")
539 .next()
540 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "ol".to_string() })?;
541
542 self.catalog = self.parse_catalog_list(nav_list)?;
543 if let Some(nav_title) = nav_title {
544 self.catalog_title = nav_title.text();
545 };
546 Ok(())
547 }
548 }
549 }
550
551 pub fn has_encryption(&self) -> bool {
567 self.has_encryption
568 }
569
570 pub fn get_metadata(&self, key: &str) -> Option<Vec<MetadataItem>> {
584 let metadatas = self
585 .metadata
586 .iter()
587 .filter(|item| item.property == key)
588 .cloned()
589 .collect::<Vec<MetadataItem>>();
590
591 (!metadatas.is_empty()).then_some(metadatas)
592 }
593
594 pub fn get_metadata_value(&self, key: &str) -> Option<Vec<String>> {
606 let values = self
607 .metadata
608 .iter()
609 .filter(|item| item.property == key)
610 .map(|item| item.value.clone())
611 .collect::<Vec<String>>();
612
613 (!values.is_empty()).then_some(values)
614 }
615
616 pub fn get_title(&self) -> Result<Vec<String>, EpubError> {
629 self.get_metadata_value("title")
630 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "title".to_string() })
631 }
632
633 pub fn get_language(&self) -> Result<Vec<String>, EpubError> {
647 self.get_metadata_value("language")
648 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "language".to_string() })
649 }
650
651 pub fn get_identifier(&self) -> Result<Vec<String>, EpubError> {
667 self.get_metadata_value("identifier")
668 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "identifier".to_string() })
669 }
670
671 pub fn get_manifest_item(&self, id: &str) -> Result<(Vec<u8>, String), EpubError> {
688 let resource_item = self
689 .manifest
690 .get(id)
691 .cloned()
692 .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
693
694 let path = resource_item.path.to_str().unwrap();
695
696 let mut archive = self.archive.lock()?;
697 let mut data = match archive.by_name(path) {
698 Ok(mut file) => {
699 let mut entry = Vec::<u8>::new();
700 file.read_to_end(&mut entry)?;
701
702 Ok(entry)
703 }
704 Err(ZipError::FileNotFound) => {
705 Err(EpubError::ResourceNotFound { resource: path.to_string() })
706 }
707 Err(err) => Err(EpubError::from(err)),
708 }?;
709
710 if let Some(method) = self.is_encryption_file(path) {
711 data = self.auto_dencrypt(&method, &mut data)?;
712 }
713
714 Ok((data, resource_item.mime))
715 }
716
717 pub fn get_manifest_item_by_path(&self, path: &str) -> Result<(Vec<u8>, String), EpubError> {
736 let id = self
737 .manifest
738 .iter()
739 .find(|(_, item)| item.path.to_str().unwrap() == path)
740 .map(|(id, _)| id.to_string())
741 .ok_or_else(|| EpubError::ResourceNotFound { resource: path.to_string() })?;
742
743 self.get_manifest_item(&id)
744 }
745
746 pub fn get_manifest_item_with_fallback(
762 &self,
763 id: &str,
764 supported_format: Vec<&str>,
765 ) -> Result<(Vec<u8>, String), EpubError> {
766 let mut manifest_item = self
767 .manifest
768 .get(id)
769 .cloned()
770 .ok_or_else(|| EpubError::ResourceIdNotExist { id: id.to_string() })?;
771
772 let mut current_manifest_id = id.to_string();
773 let mut fallback_chain = Vec::<String>::new();
774 'fallback: loop {
775 if supported_format.contains(&manifest_item.mime.as_str()) {
776 return self.get_manifest_item(¤t_manifest_id);
777 }
778
779 let fallback_id = manifest_item.fallback.clone();
780
781 match fallback_id {
782 None => break 'fallback,
784
785 Some(id) if fallback_chain.contains(&id) => break 'fallback,
787
788 Some(id) => {
789 fallback_chain.push(id.clone());
790
791 manifest_item = self
795 .manifest
796 .get(&manifest_item.fallback.unwrap())
797 .cloned()
798 .ok_or(EpubError::ResourceIdNotExist { id: id.clone() })?;
799 current_manifest_id = id;
800 }
801 };
802 }
803
804 Err(EpubError::NoSupportedFileFormat)
805 }
806
807 pub fn get_cover(&self) -> Option<(Vec<u8>, String)> {
824 self.manifest
825 .values()
826 .filter_map(|manifest| {
827 if manifest.id.to_ascii_lowercase().contains("cover") {
828 return Some(manifest.id.clone());
829 }
830
831 if let Some(properties) = &manifest.properties {
832 if properties.to_ascii_lowercase().contains("cover") {
833 return Some(manifest.id.clone());
834 }
835 }
836
837 None
838 })
839 .collect::<Vec<String>>()
840 .iter()
841 .find_map(|id| self.get_manifest_item(id).ok())
842 }
843
844 pub fn navigate_by_spine_index(&mut self, index: usize) -> Option<(Vec<u8>, String)> {
863 if index >= self.spine.len() {
864 return None;
865 }
866
867 let manifest_id = self.spine[index].idref.clone();
868 self.current_spine_index.store(index, Ordering::SeqCst);
869 self.get_manifest_item(&manifest_id).ok()
870 }
871
872 pub fn spine_prev(&self) -> Option<(Vec<u8>, String)> {
884 let current_index = self.current_spine_index.load(Ordering::SeqCst);
885 if current_index == 0 || !self.spine[current_index].linear {
886 return None;
887 }
888
889 let prev_index = (0..current_index)
890 .rev()
891 .find(|&index| self.spine[index].linear)?;
892
893 self.current_spine_index.store(prev_index, Ordering::SeqCst);
894 let manifest_id = self.spine[prev_index].idref.clone();
895 self.get_manifest_item(&manifest_id).ok()
896 }
897
898 pub fn spine_next(&mut self) -> Option<(Vec<u8>, String)> {
910 let current_index = self.current_spine_index.load(Ordering::SeqCst);
911 if current_index >= self.spine.len() - 1 || !self.spine[current_index].linear {
912 return None;
913 }
914
915 let next_index =
916 (current_index + 1..self.spine.len()).find(|&index| self.spine[index].linear)?;
917
918 self.current_spine_index.store(next_index, Ordering::SeqCst);
919 let manifest_id = self.spine[next_index].idref.clone();
920 self.get_manifest_item(&manifest_id).ok()
921 }
922
923 pub fn spine_current(&self) -> Option<(Vec<u8>, String)> {
933 let manifest_id = self.spine[self.current_spine_index.load(Ordering::SeqCst)]
934 .idref
935 .clone();
936 self.get_manifest_item(&manifest_id).ok()
937 }
938
939 fn determine_epub_version(opf_element: &XmlElement) -> Result<EpubVersion, EpubError> {
949 if let Some(version) = opf_element.get_attr("version") {
951 match version.as_str() {
952 "2.0" => return Ok(EpubVersion::Version2_0),
953 "3.0" => return Ok(EpubVersion::Version3_0),
954 _ => {}
955 }
956 }
957
958 let spine_element = opf_element
959 .find_elements_by_name("spine")
960 .next()
961 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "spine".to_string() })?;
962
963 if spine_element.get_attr("toc").is_some() {
965 return Ok(EpubVersion::Version2_0);
966 }
967
968 let manifest_element = opf_element
969 .find_elements_by_name("manifest")
970 .next()
971 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "manifest".to_string() })?;
972
973 manifest_element
975 .children()
976 .find_map(|element| {
977 if let Some(id) = element.get_attr("id") {
978 if id.eq("nav") {
979 return Some(EpubVersion::Version3_0);
980 }
981 }
982
983 None
984 })
985 .ok_or(EpubError::UnrecognizedEpubVersion)
986 }
987
988 #[inline]
998 fn parse_dc_metadata(
999 &self,
1000 element: &XmlElement,
1001 metadata: &mut Vec<MetadataItem>,
1002 ) -> Result<(), EpubError> {
1004 let id = element.get_attr("id");
1005 let lang = element.get_attr("lang");
1006 let property = element.name.clone();
1007 let value = element.text().normalize_whitespace();
1008
1009 let refined = match self.version {
1010 EpubVersion::Version2_0 => element
1013 .attributes
1014 .iter()
1015 .map(|(name, value)| {
1016 let property = name.to_string();
1017 let value = value.to_string().normalize_whitespace();
1018
1019 MetadataRefinement {
1020 refines: id.clone().unwrap(),
1021 property,
1022 value,
1023 lang: None,
1024 scheme: None,
1025 }
1026 })
1027 .collect(),
1028 EpubVersion::Version3_0 => vec![],
1029 };
1030
1031 metadata.push(MetadataItem { id, property, value, lang, refined });
1032
1033 Ok(())
1034 }
1035
1036 #[inline]
1047 fn parse_opf_metadata(
1048 &self,
1049 element: &XmlElement,
1050 metadata: &mut Vec<MetadataItem>,
1051 metadata_link: &mut Vec<MetadataLinkItem>,
1052 refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1053 ) -> Result<(), EpubError> {
1054 match element.name.as_str() {
1055 "meta" => self.parse_meta_element(element, metadata, refinements),
1056 "link" => self.parse_link_element(element, metadata_link),
1057 _ => Ok(()),
1058 }
1059 }
1060
1061 #[inline]
1062 fn parse_meta_element(
1063 &self,
1064 element: &XmlElement,
1065 metadata: &mut Vec<MetadataItem>,
1066 refinements: &mut HashMap<String, Vec<MetadataRefinement>>,
1067 ) -> Result<(), EpubError> {
1068 match self.version {
1069 EpubVersion::Version2_0 => {
1070 let property = element
1071 .get_attr("name")
1072 .ok_or_else(|| EpubError::NonCanonicalFile { tag: element.tag_name() })?;
1073 let value = element
1074 .get_attr("content")
1075 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1076 tag: element.tag_name(),
1077 attribute: "content".to_string(),
1078 })?
1079 .normalize_whitespace();
1080
1081 metadata.push(MetadataItem {
1082 id: None,
1083 property,
1084 value,
1085 lang: None,
1086 refined: vec![],
1087 });
1088 }
1089
1090 EpubVersion::Version3_0 => {
1091 let property = element.get_attr("property").ok_or_else(|| {
1092 EpubError::MissingRequiredAttribute {
1093 tag: element.tag_name(),
1094 attribute: "property".to_string(),
1095 }
1096 })?;
1097 let value = element.text().normalize_whitespace();
1098 let lang = element.get_attr("lang");
1099
1100 if let Some(refines) = element.get_attr("refines") {
1101 let id = refines.strip_prefix("#").unwrap_or(&refines).to_string();
1102 let scheme = element.get_attr("scheme");
1103 let refinement = MetadataRefinement {
1104 refines: id.clone(),
1105 property,
1106 value,
1107 lang,
1108 scheme,
1109 };
1110
1111 if let Some(refinements) = refinements.get_mut(&id) {
1112 refinements.push(refinement);
1113 } else {
1114 refinements.insert(id, vec![refinement]);
1115 }
1116 } else {
1117 let id = element.get_attr("id");
1118 let item = MetadataItem {
1119 id,
1120 property,
1121 value,
1122 lang,
1123 refined: vec![],
1124 };
1125
1126 metadata.push(item);
1127 };
1128 }
1129 }
1130 Ok(())
1131 }
1132
1133 #[inline]
1134 fn parse_link_element(
1135 &self,
1136 element: &XmlElement,
1137 metadata_link: &mut Vec<MetadataLinkItem>,
1138 ) -> Result<(), EpubError> {
1139 let href = element
1140 .get_attr("href")
1141 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1142 tag: element.tag_name(),
1143 attribute: "href".to_string(),
1144 })?;
1145 let rel = element
1146 .get_attr("rel")
1147 .ok_or_else(|| EpubError::MissingRequiredAttribute {
1148 tag: element.tag_name(),
1149 attribute: "rel".to_string(),
1150 })?;
1151 let hreflang = element.get_attr("hreflang");
1152 let id = element.get_attr("id");
1153 let mime = element.get_attr("media-type");
1154 let properties = element.get_attr("properties");
1155
1156 metadata_link.push(MetadataLinkItem {
1157 href,
1158 rel,
1159 hreflang,
1160 id,
1161 mime,
1162 properties,
1163 refines: None,
1164 });
1165 Ok(())
1166 }
1167
1168 fn parse_nav_points(&self, parent_element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1174 let mut nav_points = Vec::new();
1175 for nav_point in parent_element.find_children_by_name("navPoint") {
1176 let label = match nav_point.find_children_by_name("navLabel").next() {
1177 Some(element) => element.text(),
1178 None => String::new(),
1179 };
1180
1181 let content = nav_point
1182 .find_children_by_name("content")
1183 .next()
1184 .map(|element| PathBuf::from(element.text()));
1185
1186 let play_order = nav_point
1187 .get_attr("playOrder")
1188 .and_then(|order| order.parse::<usize>().ok());
1189
1190 let children = self.parse_nav_points(nav_point)?;
1191
1192 nav_points.push(NavPoint { label, content, play_order, children });
1193 }
1194
1195 nav_points.sort();
1196 Ok(nav_points)
1197 }
1198
1199 fn parse_catalog_list(&self, element: &XmlElement) -> Result<Vec<NavPoint>, EpubError> {
1205 let mut catalog = Vec::new();
1206 for item in element.children() {
1207 if item.tag_name() != "li" {
1208 return Err(EpubError::NonCanonicalFile { tag: "li".to_string() });
1209 }
1210
1211 let title_element = item
1212 .find_children_by_names(&["span", "a"])
1213 .next()
1214 .ok_or_else(|| EpubError::NonCanonicalFile { tag: "span/a".to_string() })?;
1215 let content_href = title_element.get_attr("href").map(PathBuf::from);
1216 let sub_list = if let Some(list) = item.find_children_by_name("ol").next() {
1217 self.parse_catalog_list(list)?
1218 } else {
1219 vec![]
1220 };
1221
1222 catalog.push(NavPoint {
1223 label: title_element.text(),
1224 content: content_href,
1225 children: sub_list,
1226 play_order: None,
1227 });
1228 }
1229
1230 Ok(catalog)
1231 }
1232
1233 #[inline]
1250 fn normalize_manifest_path(&self, path: &str) -> Result<PathBuf, EpubError> {
1251 let mut path = if path.starts_with("../") {
1252 let mut current_dir = self.epub_path.join(&self.package_path);
1253 current_dir.pop();
1254
1255 check_realtive_link_leakage(self.epub_path.clone(), current_dir, path)
1256 .map(PathBuf::from)
1257 .ok_or_else(|| EpubError::RealtiveLinkLeakage { path: path.to_string() })?
1258 } else if let Some(path) = path.strip_prefix("/") {
1259 PathBuf::from(path.to_string())
1260 } else {
1261 self.base_path.join(path)
1262 };
1263
1264 #[cfg(windows)]
1265 {
1266 path = PathBuf::from(path.to_string_lossy().replace('\\', "/"));
1267 }
1268
1269 Ok(path)
1270 }
1271
1272 fn validate_fallback_chains(&self) {
1283 for (id, item) in &self.manifest {
1284 if item.fallback.is_none() {
1285 continue;
1286 }
1287
1288 let mut fallback_chain = Vec::new();
1289 if let Err(msg) = self.validate_fallback_chain(id, &mut fallback_chain) {
1290 warn!("Invalid fallback chain for item {}: {}", id, msg);
1291 }
1292 }
1293 }
1294
1295 fn validate_fallback_chain(
1309 &self,
1310 manifest_id: &str,
1311 fallback_chain: &mut Vec<String>,
1312 ) -> Result<(), String> {
1313 if fallback_chain.contains(&manifest_id.to_string()) {
1314 fallback_chain.push(manifest_id.to_string());
1315
1316 return Err(format!(
1317 "Circular reference detected in fallback chain for {}",
1318 fallback_chain.join("->")
1319 ));
1320 }
1321
1322 let item = self.manifest.get(manifest_id).unwrap();
1324
1325 if let Some(fallback_id) = &item.fallback {
1326 if !self.manifest.contains_key(fallback_id) {
1327 return Err(format!(
1328 "Fallback resource {} does not exist in manifest",
1329 fallback_id
1330 ));
1331 }
1332
1333 fallback_chain.push(manifest_id.to_string());
1334 self.validate_fallback_chain(fallback_id, fallback_chain)
1335 } else {
1336 Ok(())
1338 }
1339 }
1340
1341 fn is_encryption_file(&self, path: &str) -> Option<String> {
1354 self.encryption.as_ref().and_then(|encryptions| {
1355 encryptions
1356 .iter()
1357 .find(|encryption| encryption.data == path)
1358 .map(|encryption| encryption.method.clone())
1359 })
1360 }
1361
1362 #[inline]
1380 fn auto_dencrypt(&self, method: &str, data: &mut [u8]) -> Result<Vec<u8>, EpubError> {
1381 match method {
1382 "http://www.idpf.org/2008/embedding" => {
1383 Ok(idpf_font_dencryption(data, &self.unique_identifier))
1384 }
1385 "http://ns.adobe.com/pdf/enc#RC" => {
1386 Ok(adobe_font_dencryption(data, &self.unique_identifier))
1387 }
1388 _ => Err(EpubError::UnsupportedEncryptedMethod { method: method.to_string() }),
1389 }
1390 }
1391}
1392
1393impl EpubDoc<BufReader<File>> {
1394 pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, EpubError> {
1406 let file = File::open(&path).map_err(EpubError::from)?;
1407 let path = fs::canonicalize(path)?;
1408
1409 Self::from_reader(BufReader::new(file), path)
1410 }
1411
1412 pub fn is_valid_epub<P: AsRef<Path>>(path: P) -> Result<bool, EpubError> {
1427 let result = EpubDoc::new(path);
1428
1429 match result {
1430 Ok(_) => Ok(true),
1431 Err(err) if Self::is_outside_error(&err) => Err(err),
1432 Err(_) => Ok(false),
1433 }
1434 }
1435
1436 fn is_outside_error(err: &EpubError) -> bool {
1455 matches!(
1456 err,
1457 EpubError::ArchiveError { .. }
1458 | EpubError::IOError { .. }
1459 | EpubError::MutexError { .. }
1460 | EpubError::Utf8DecodeError { .. }
1461 | EpubError::Utf16DecodeError { .. }
1462 | EpubError::QuickXmlError { .. }
1463 )
1464 }
1465}
1466
1467#[cfg(test)]
1468mod tests {
1469 use std::{
1470 fs::File,
1471 io::BufReader,
1472 path::{Path, PathBuf},
1473 };
1474
1475 use crate::{epub::EpubDoc, error::EpubError, utils::XmlReader};
1476
1477 mod package_documents_tests {
1479 use std::{path::Path, sync::atomic::Ordering};
1480
1481 use crate::epub::{EpubDoc, EpubVersion};
1482
1483 #[test]
1487 fn test_pkg_collections_unknown() {
1488 let epub_file = Path::new("./test_case/pkg-collections-unknown.epub");
1489 let doc = EpubDoc::new(epub_file);
1490 assert!(doc.is_ok());
1491 }
1492
1493 #[test]
1497 fn test_pkg_creator_order() {
1498 let epub_file = Path::new("./test_case/pkg-creator-order.epub");
1499 let doc = EpubDoc::new(epub_file);
1500 assert!(doc.is_ok());
1501
1502 let doc = doc.unwrap();
1503 let creators = doc.get_metadata_value("creator");
1504 assert!(creators.is_some());
1505
1506 let creators = creators.unwrap();
1507 assert_eq!(creators.len(), 5);
1508 assert_eq!(
1509 creators,
1510 vec![
1511 "Dave Cramer",
1512 "Wendy Reid",
1513 "Dan Lazin",
1514 "Ivan Herman",
1515 "Brady Duga",
1516 ]
1517 );
1518 }
1519
1520 #[test]
1524 fn test_pkg_manifest_order() {
1525 let epub_file = Path::new("./test_case/pkg-manifest-unknown.epub");
1526 let doc = EpubDoc::new(epub_file);
1527 assert!(doc.is_ok());
1528
1529 let doc = doc.unwrap();
1530 assert_eq!(doc.manifest.len(), 2);
1531 assert!(doc.get_manifest_item("nav").is_ok());
1532 assert!(doc.get_manifest_item("content_001").is_ok());
1533 assert!(doc.get_manifest_item("content_002").is_err());
1534 }
1535
1536 #[test]
1540 fn test_pkg_meta_unknown() {
1541 let epub_file = Path::new("./test_case/pkg-meta-unknown.epub");
1542 let doc = EpubDoc::new(epub_file);
1543 assert!(doc.is_ok());
1544
1545 let doc = doc.unwrap();
1546 let value = doc.get_metadata_value("dcterms:isReferencedBy");
1547 assert!(value.is_some());
1548 let value = value.unwrap();
1549 assert_eq!(value.len(), 1);
1550 assert_eq!(
1551 value,
1552 vec!["https://www.w3.org/TR/epub-rs/#confreq-rs-pkg-meta-unknown"]
1553 );
1554
1555 let value = doc.get_metadata_value("dcterms:modified");
1556 assert!(value.is_some());
1557 let value = value.unwrap();
1558 assert_eq!(value.len(), 1);
1559 assert_eq!(value, vec!["2021-01-11T00:00:00Z"]);
1560
1561 let value = doc.get_metadata_value("dcterms:title");
1562 assert!(value.is_none());
1563 }
1564
1565 #[test]
1569 fn test_pkg_meta_white_space() {
1570 let epub_file = Path::new("./test_case/pkg-meta-whitespace.epub");
1571 let doc = EpubDoc::new(epub_file);
1572 assert!(doc.is_ok());
1573
1574 let doc = doc.unwrap();
1575 let value = doc.get_metadata_value("creator");
1576 assert!(value.is_some());
1577 let value = value.unwrap();
1578 assert_eq!(value.len(), 1);
1579 assert_eq!(value, vec!["Dave Cramer"]);
1580
1581 let value = doc.get_metadata_value("description");
1582 assert!(value.is_some());
1583 let value = value.unwrap();
1584 assert_eq!(value.len(), 1);
1585 assert_eq!(
1586 value,
1587 vec![
1588 "The package document's title and creator contain leading and trailing spaces along with excess internal whitespace. The reading system must render only a single space in all cases."
1589 ]
1590 );
1591 }
1592
1593 #[test]
1597 fn test_pkg_spine_duplicate_item_hyperlink() {
1598 let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-hyperlink.epub");
1599 let doc = EpubDoc::new(epub_file);
1600 assert!(doc.is_ok());
1601
1602 let mut doc = doc.unwrap();
1603 assert_eq!(doc.spine.len(), 4);
1604 assert_eq!(
1605 doc.navigate_by_spine_index(0).unwrap(),
1606 doc.get_manifest_item("content_001").unwrap()
1607 );
1608 assert_eq!(
1609 doc.navigate_by_spine_index(1).unwrap(),
1610 doc.get_manifest_item("content_002").unwrap()
1611 );
1612 assert_eq!(
1613 doc.navigate_by_spine_index(2).unwrap(),
1614 doc.get_manifest_item("content_002").unwrap()
1615 );
1616 assert_eq!(
1617 doc.navigate_by_spine_index(3).unwrap(),
1618 doc.get_manifest_item("content_002").unwrap()
1619 );
1620 }
1621
1622 #[test]
1626 fn test_pkg_spine_duplicate_item_rendering() {
1627 let epub_file = Path::new("./test_case/pkg-spine-duplicate-item-rendering.epub");
1628 let doc = EpubDoc::new(epub_file);
1629 assert!(doc.is_ok());
1630
1631 let mut doc = doc.unwrap();
1632 assert_eq!(doc.spine.len(), 4);
1633
1634 let result = doc.spine_prev();
1635 assert!(result.is_none());
1636
1637 let result = doc.spine_next();
1638 assert!(result.is_some());
1639
1640 doc.spine_next();
1641 doc.spine_next();
1642 let result = doc.spine_next();
1643 assert!(result.is_none());
1644 }
1645
1646 #[test]
1650 fn test_pkg_spine_nonlinear_activation() {
1651 let epub_file = Path::new("./test_case/pkg-spine-nonlinear-activation.epub");
1652 let doc = EpubDoc::new(epub_file);
1653 assert!(doc.is_ok());
1654
1655 let mut doc = doc.unwrap();
1656 assert!(doc.spine_prev().is_none());
1657 assert!(doc.spine_next().is_none());
1658
1659 assert!(doc.navigate_by_spine_index(1).is_some());
1660 assert!(doc.spine_prev().is_none());
1661 assert!(doc.spine_next().is_none());
1662 }
1663
1664 #[test]
1668 fn test_pkg_spine_order() {
1669 let epub_file = Path::new("./test_case/pkg-spine-order.epub");
1670 let doc = EpubDoc::new(epub_file);
1671 assert!(doc.is_ok());
1672
1673 let doc = doc.unwrap();
1674 assert_eq!(doc.spine.len(), 4);
1675 assert_eq!(
1676 doc.spine
1677 .iter()
1678 .map(|item| item.idref.clone())
1679 .collect::<Vec<String>>(),
1680 vec![
1681 "d-content_001",
1682 "c-content_002",
1683 "b-content_003",
1684 "a-content_004",
1685 ]
1686 );
1687 }
1688
1689 #[test]
1693 fn test_spine_order_svg() {
1694 let epub_file = Path::new("./test_case/pkg-spine-order-svg.epub");
1695 let doc = EpubDoc::new(epub_file);
1696 assert!(doc.is_ok());
1697
1698 let mut doc = doc.unwrap();
1699 assert_eq!(doc.spine.len(), 4);
1700
1701 loop {
1702 if let Some(spine) = doc.spine_next() {
1703 let idref = doc.spine[doc.current_spine_index.load(Ordering::Relaxed)]
1704 .idref
1705 .clone();
1706 let resource = doc.get_manifest_item(&idref);
1707 assert!(resource.is_ok());
1708
1709 let resource = resource.unwrap();
1710 assert_eq!(spine, resource);
1711 } else {
1712 break;
1713 }
1714 }
1715
1716 assert_eq!(doc.current_spine_index.load(Ordering::Relaxed), 3);
1717 }
1718
1719 #[test]
1723 fn test_pkg_spine_unknown() {
1724 let epub_file = Path::new("./test_case/pkg-spine-unknown.epub");
1725 let doc = EpubDoc::new(epub_file);
1726 assert!(doc.is_ok());
1727
1728 let doc = doc.unwrap();
1729 assert_eq!(doc.spine.len(), 1);
1730 assert_eq!(doc.spine[0].idref, "content_001");
1731 assert_eq!(doc.spine[0].id, None);
1732 assert_eq!(doc.spine[0].linear, true);
1733 assert_eq!(doc.spine[0].properties, Some("untrustworthy".to_string()));
1734 }
1735
1736 #[test]
1740 fn test_pkg_title_order() {
1741 let epub_file = Path::new("./test_case/pkg-title-order.epub");
1742 let doc = EpubDoc::new(epub_file);
1743 assert!(doc.is_ok());
1744
1745 let doc = doc.unwrap();
1746 let title_list = doc.get_title();
1747 assert!(title_list.is_ok());
1748
1749 let title_list = title_list.unwrap();
1750 assert_eq!(title_list.len(), 6);
1751 assert_eq!(
1752 title_list,
1753 vec![
1754 "pkg-title-order",
1755 "This title must not display first",
1756 "Also, this title must not display first",
1757 "This title also must not display first",
1758 "This title must also not display first",
1759 "This title must not display first, also",
1760 ]
1761 );
1762 }
1763
1764 #[test]
1768 fn test_pkg_unique_id() {
1769 let epub_file = Path::new("./test_case/pkg-unique-id.epub");
1770 let doc_1 = EpubDoc::new(epub_file);
1771 assert!(doc_1.is_ok());
1772
1773 let epub_file = Path::new("./test_case/pkg-unique-id_duplicate.epub");
1774 let doc_2 = EpubDoc::new(epub_file);
1775 assert!(doc_2.is_ok());
1776
1777 let doc_1 = doc_1.unwrap();
1778 let doc_2 = doc_2.unwrap();
1779
1780 assert_eq!(
1781 doc_1.get_identifier().unwrap(),
1782 doc_2.get_identifier().unwrap()
1783 );
1784 assert_eq!(doc_1.unique_identifier, "pkg-unique-id");
1785 assert_eq!(doc_2.unique_identifier, "pkg-unique-id");
1786 }
1787
1788 #[test]
1792 fn test_pkg_version_backward() {
1793 let epub_file = Path::new("./test_case/pkg-version-backward.epub");
1794 let doc = EpubDoc::new(epub_file);
1795 assert!(doc.is_ok());
1796
1797 let doc = doc.unwrap();
1798 assert_eq!(doc.version, EpubVersion::Version3_0);
1799 }
1800
1801 #[test]
1805 fn test_pkg_linked_records() {
1806 let epub_file = Path::new("./test_case/pkg-linked-records.epub");
1807 let doc = EpubDoc::new(epub_file);
1808 assert!(doc.is_ok());
1809
1810 let doc = doc.unwrap();
1811 assert_eq!(doc.metadata_link.len(), 3);
1812
1813 let item = doc.metadata_link.iter().find(|&item| {
1814 if let Some(properties) = &item.properties {
1815 properties.eq("onix")
1816 } else {
1817 false
1818 }
1819 });
1820 assert!(item.is_some());
1821 }
1822
1823 #[test]
1827 fn test_pkg_manifest_unlisted_resource() {
1828 let epub_file = Path::new("./test_case/pkg-manifest-unlisted-resource.epub");
1829 let doc = EpubDoc::new(epub_file);
1830 assert!(doc.is_ok());
1831
1832 let doc = doc.unwrap();
1833 assert!(
1834 doc.get_manifest_item_by_path("EPUB/content_001.xhtml")
1835 .is_ok()
1836 );
1837
1838 assert!(doc.get_manifest_item_by_path("EPUB/red.png").is_err());
1839 let err = doc.get_manifest_item_by_path("EPUB/red.png").unwrap_err();
1840 assert_eq!(
1841 err.to_string(),
1842 "Resource not found: Unable to find resource from \"EPUB/red.png\"."
1843 );
1844 }
1845 }
1846
1847 mod manifest_fallbacks_tests {
1851 use std::path::Path;
1852
1853 use crate::epub::EpubDoc;
1854
1855 #[test]
1859 fn test_pub_foreign_bad_fallback() {
1860 let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
1861 let doc = EpubDoc::new(epub_file);
1862 assert!(doc.is_ok());
1863
1864 let doc = doc.unwrap();
1865 assert!(doc.get_manifest_item("content_001").is_ok());
1866 assert!(doc.get_manifest_item("bar").is_ok());
1867
1868 assert_eq!(
1869 doc.get_manifest_item_with_fallback("content_001", vec!["application/xhtml+xml"])
1870 .unwrap_err()
1871 .to_string(),
1872 "No supported file format: The fallback resource does not contain the file format you support."
1873 );
1874 }
1875
1876 #[test]
1880 fn test_pub_foreign_image() {
1881 let epub_file = Path::new("./test_case/pub-foreign_image.epub");
1882 let doc = EpubDoc::new(epub_file);
1883 assert!(doc.is_ok());
1884
1885 let doc = doc.unwrap();
1886 let result = doc.get_manifest_item_with_fallback(
1887 "image-tiff",
1888 vec!["image/png", "application/xhtml+xml"],
1889 );
1890 assert!(result.is_ok());
1891
1892 let (_, mime) = result.unwrap();
1893 assert_eq!(mime, "image/png");
1894 }
1895
1896 #[test]
1900 fn test_pub_foreign_json_spine() {
1901 let epub_file = Path::new("./test_case/pub-foreign_json-spine.epub");
1902 let doc = EpubDoc::new(epub_file);
1903 assert!(doc.is_ok());
1904
1905 let doc = doc.unwrap();
1906 let result = doc.get_manifest_item_with_fallback(
1907 "content_primary",
1908 vec!["application/xhtml+xml", "application/json"],
1909 );
1910 assert!(result.is_ok());
1911 let (_, mime) = result.unwrap();
1912 assert_eq!(mime, "application/json");
1913
1914 let result = doc
1915 .get_manifest_item_with_fallback("content_primary", vec!["application/xhtml+xml"]);
1916 assert!(result.is_ok());
1917 let (_, mime) = result.unwrap();
1918 assert_eq!(mime, "application/xhtml+xml");
1919 }
1920
1921 #[test]
1925 fn test_pub_foreign_xml_spine() {
1926 let epub_file = Path::new("./test_case/pub-foreign_xml-spine.epub");
1927 let doc = EpubDoc::new(epub_file);
1928 assert!(doc.is_ok());
1929
1930 let doc = doc.unwrap();
1931 let result = doc.get_manifest_item_with_fallback(
1932 "content_primary",
1933 vec!["application/xhtml+xml", "application/xml"],
1934 );
1935 assert!(result.is_ok());
1936 let (_, mime) = result.unwrap();
1937 assert_eq!(mime, "application/xml");
1938
1939 let result = doc
1940 .get_manifest_item_with_fallback("content_primary", vec!["application/xhtml+xml"]);
1941 assert!(result.is_ok());
1942 let (_, mime) = result.unwrap();
1943 assert_eq!(mime, "application/xhtml+xml");
1944 }
1945
1946 #[test]
1950 fn test_pub_foreign_xml_suffix_spine() {
1951 let epub_file = Path::new("./test_case/pub-foreign_xml-suffix-spine.epub");
1952 let doc = EpubDoc::new(epub_file);
1953 assert!(doc.is_ok());
1954
1955 let doc = doc.unwrap();
1956 let result = doc.get_manifest_item_with_fallback(
1957 "content_primary",
1958 vec!["application/xhtml+xml", "application/dtc+xml"],
1959 );
1960 assert!(result.is_ok());
1961 let (_, mime) = result.unwrap();
1962 assert_eq!(mime, "application/dtc+xml");
1963
1964 let result = doc
1965 .get_manifest_item_with_fallback("content_primary", vec!["application/xhtml+xml"]);
1966 assert!(result.is_ok());
1967 let (_, mime) = result.unwrap();
1968 assert_eq!(mime, "application/xhtml+xml");
1969 }
1970 }
1971
1972 mod open_container_format_tests {
1974 use std::{cmp::min, io::Read, path::Path};
1975
1976 use sha1::{Digest, Sha1};
1977
1978 use crate::epub::EpubDoc;
1979
1980 #[test]
1984 fn test_ocf_metainf_inc() {
1985 let epub_file = Path::new("./test_case/ocf-metainf-inc.epub");
1986 let doc = EpubDoc::new(epub_file);
1987 assert!(doc.is_ok());
1988 }
1989
1990 #[test]
1994 fn test_ocf_metainf_manifest() {
1995 let epub_file = Path::new("./test_case/ocf-metainf-manifest.epub");
1996 let doc = EpubDoc::new(epub_file);
1997 assert!(doc.is_ok());
1998 }
1999
2000 #[test]
2004 fn test_ocf_package_arbitrary() {
2005 let epub_file = Path::new("./test_case/ocf-package_arbitrary.epub");
2006 let doc = EpubDoc::new(epub_file);
2007 assert!(doc.is_ok());
2008
2009 let doc = doc.unwrap();
2010 assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2011 }
2012
2013 #[test]
2017 fn test_ocf_package_multiple() {
2018 let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2019 let doc = EpubDoc::new(epub_file);
2020 assert!(doc.is_ok());
2021
2022 let doc = doc.unwrap();
2023 assert_eq!(doc.package_path, Path::new("FOO/BAR/package.opf"));
2024 assert_eq!(doc.base_path, Path::new("FOO/BAR"));
2025 }
2026
2027 #[test]
2031 fn test_ocf_url_link_leaking_relative() {
2032 let epub_file = Path::new("./test_case/ocf-url_link-leaking-relative.epub");
2033 let doc = EpubDoc::new(epub_file);
2034 assert!(doc.is_err());
2035 assert_eq!(
2036 doc.err().unwrap().to_string(),
2037 String::from(
2038 "Relative link leakage: Path \"../../../../media/imgs/monastery.jpg\" is out of container range."
2039 )
2040 )
2041 }
2042
2043 #[test]
2047 fn test_ocf_url_link_path_absolute() {
2048 let epub_file = Path::new("./test_case/ocf-url_link-path-absolute.epub");
2049 let doc = EpubDoc::new(epub_file);
2050 assert!(doc.is_ok());
2051
2052 let doc = doc.unwrap();
2053 let resource = doc.manifest.get("photo").unwrap();
2054 assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2055 }
2056
2057 #[test]
2061 fn test_ocf_url_link_relative() {
2062 let epub_file = Path::new("./test_case/ocf-url_link-relative.epub");
2063 let doc = EpubDoc::new(epub_file);
2064 assert!(doc.is_ok());
2065
2066 let doc = doc.unwrap();
2067 let resource = doc.manifest.get("photo").unwrap();
2068 assert_eq!(resource.path, Path::new("media/imgs/monastery.jpg"));
2069 }
2070
2071 #[test]
2075 fn test_ocf_url_manifest() {
2076 let epub_file = Path::new("./test_case/ocf-url_manifest.epub");
2077 let doc = EpubDoc::new(epub_file);
2078 assert!(doc.is_ok());
2079
2080 let doc = doc.unwrap();
2081 assert!(doc.get_manifest_item("nav").is_ok());
2082 assert!(doc.get_manifest_item("content_001").is_ok());
2083 assert!(doc.get_manifest_item("content_002").is_err());
2084 }
2085
2086 #[test]
2090 fn test_ocf_url_relative() {
2091 let epub_file = Path::new("./test_case/ocf-url_relative.epub");
2092 let doc = EpubDoc::new(epub_file);
2093 assert!(doc.is_ok());
2094
2095 let doc = doc.unwrap();
2096 assert_eq!(doc.package_path, Path::new("foo/BAR/baz.opf"));
2097 assert_eq!(doc.base_path, Path::new("foo/BAR"));
2098 assert_eq!(
2099 doc.manifest.get("nav").unwrap().path,
2100 Path::new("foo/BAR/nav.xhtml")
2101 );
2102 assert_eq!(
2103 doc.manifest.get("content_001").unwrap().path,
2104 Path::new("foo/BAR/qux/content_001.xhtml")
2105 );
2106 assert!(doc.get_manifest_item("nav").is_ok());
2107 assert!(doc.get_manifest_item("content_001").is_ok());
2108 }
2109
2110 #[test]
2115 fn test_ocf_zip_comp() {
2116 let epub_file = Path::new("./test_case/ocf-zip-comp.epub");
2117 let doc = EpubDoc::new(epub_file);
2118 assert!(doc.is_ok());
2119 }
2120
2121 #[test]
2126 fn test_ocf_zip_mult() {
2127 let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2128 let doc = EpubDoc::new(epub_file);
2129 assert!(doc.is_ok());
2130 }
2131
2132 #[test]
2136 fn test_ocf_font_obfuscation() {
2137 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2138 let doc = EpubDoc::new(epub_file);
2139 assert!(doc.is_ok());
2140
2141 let doc = doc.unwrap();
2142 let unique_id = doc.unique_identifier.clone();
2143
2144 let mut hasher = Sha1::new();
2145 hasher.update(unique_id.as_bytes());
2146 let hash = hasher.finalize();
2147 let mut key = vec![0u8; 1040];
2148 for i in 0..1040 {
2149 key[i] = hash[i % hash.len()];
2150 }
2151
2152 assert!(doc.encryption.is_some());
2153 assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2154
2155 let data = &doc.encryption.unwrap()[0];
2156 assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2157
2158 let font_file = doc
2159 .archive
2160 .lock()
2161 .unwrap()
2162 .by_name(&data.data)
2163 .unwrap()
2164 .bytes()
2165 .collect::<Result<Vec<u8>, _>>();
2166 assert!(font_file.is_ok());
2167 let font_file = font_file.unwrap();
2168
2169 let mut deobfuscated = font_file.clone();
2171 for i in 0..min(1040, deobfuscated.len()) {
2172 deobfuscated[i] ^= key[i];
2173 }
2174
2175 assert!(is_valid_font(&deobfuscated));
2176 }
2177
2178 #[test]
2182 fn test_ocf_font_obfuscation_bis() {
2183 let epub_file = Path::new("./test_case/ocf-font_obfuscation_bis.epub");
2184 let doc = EpubDoc::new(epub_file);
2185 assert!(doc.is_ok());
2186
2187 let doc = doc.unwrap();
2188
2189 let wrong_unique_id = "wrong-publication-id";
2190 let mut hasher = Sha1::new();
2191 hasher.update(wrong_unique_id.as_bytes());
2192 let hash = hasher.finalize();
2193 let mut wrong_key = vec![0u8; 1040];
2194 for i in 0..1040 {
2195 wrong_key[i] = hash[i % hash.len()];
2196 }
2197
2198 assert!(doc.encryption.is_some());
2199 assert_eq!(doc.encryption.as_ref().unwrap().len(), 1);
2200
2201 let data = &doc.encryption.unwrap()[0];
2202 assert_eq!(data.method, "http://www.idpf.org/2008/embedding");
2203
2204 let font_file = doc
2205 .archive
2206 .lock()
2207 .unwrap()
2208 .by_name(&data.data)
2209 .unwrap()
2210 .bytes()
2211 .collect::<Result<Vec<u8>, _>>();
2212 assert!(font_file.is_ok());
2213 let font_file = font_file.unwrap();
2214
2215 let mut deobfuscated_with_wrong_key = font_file.clone();
2217 for i in 0..std::cmp::min(1040, deobfuscated_with_wrong_key.len()) {
2218 deobfuscated_with_wrong_key[i] ^= wrong_key[i];
2219 }
2220
2221 assert!(!is_valid_font(&deobfuscated_with_wrong_key));
2222 }
2223
2224 fn is_valid_font(data: &[u8]) -> bool {
2225 if data.len() < 4 {
2226 return false;
2227 }
2228 let sig = &data[0..4];
2229 sig == b"OTTO"
2232 || sig == b"\x00\x01\x00\x00"
2233 || sig == b"\x00\x02\x00\x00"
2234 || sig == b"true"
2235 || sig == b"typ1"
2236 }
2237 }
2238
2239 #[test]
2240 fn test_parse_container() {
2241 let epub_file = Path::new("./test_case/ocf-zip-mult.epub");
2242 let doc = EpubDoc::new(epub_file);
2243 assert!(doc.is_ok());
2244
2245 let container = r#"
2247 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2248 <rootfiles></rootfiles>
2249 </container>
2250 "#
2251 .to_string();
2252
2253 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2254 assert!(result.is_err());
2255 assert_eq!(
2256 result.unwrap_err(),
2257 EpubError::NonCanonicalFile { tag: "rootfile".to_string() }
2258 );
2259
2260 let container = r#"
2261 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2262 <rootfiles>
2263 <rootfile media-type="application/oebps-package+xml"/>
2264 </rootfiles>
2265 </container>
2266 "#
2267 .to_string();
2268
2269 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2270 assert!(result.is_err());
2271 assert_eq!(
2272 result.unwrap_err(),
2273 EpubError::MissingRequiredAttribute {
2274 tag: "rootfile".to_string(),
2275 attribute: "full-path".to_string(),
2276 }
2277 );
2278
2279 let container = r#"
2280 <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
2281 <rootfiles>
2282 <rootfile media-type="application/oebps-package+xml" full-path="EPUB/content.opf"/>
2283 </rootfiles>
2284 </container>
2285 "#
2286 .to_string();
2287
2288 let result = EpubDoc::<BufReader<File>>::parse_container(container);
2289 assert!(result.is_ok());
2290 assert_eq!(result.unwrap(), PathBuf::from("EPUB/content.opf"))
2291 }
2292
2293 #[test]
2294 fn test_parse_manifest() {
2295 let epub_file = Path::new("./test_case/ocf-package_multiple.epub");
2296 let doc = EpubDoc::new(epub_file);
2297 assert!(doc.is_ok());
2298
2299 let manifest = r#"
2300 <manifest>
2301 <item href="content_001.xhtml" media-type="application/xhtml+xml"/>
2302 <item properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2303 </manifest>
2304 "#;
2305 let mut doc = doc.unwrap();
2306 let element = XmlReader::parse(manifest);
2307 assert!(element.is_ok());
2308
2309 let element = element.unwrap();
2310 let result = doc.parse_manifest(&element);
2311 assert!(result.is_err());
2312 assert_eq!(
2313 result.unwrap_err(),
2314 EpubError::MissingRequiredAttribute {
2315 tag: "item".to_string(),
2316 attribute: "id".to_string(),
2317 },
2318 );
2319
2320 let manifest = r#"
2321 <manifest>
2322 <item id="content_001" media-type="application/xhtml+xml"/>
2323 <item id="nav" properties="nav" media-type="application/xhtml+xml"/>
2324 </manifest>
2325 "#;
2326 let element = XmlReader::parse(manifest);
2327 assert!(element.is_ok());
2328
2329 let element = element.unwrap();
2330 let result = doc.parse_manifest(&element);
2331 assert!(result.is_err());
2332 assert_eq!(
2333 result.unwrap_err(),
2334 EpubError::MissingRequiredAttribute {
2335 tag: "item".to_string(),
2336 attribute: "href".to_string(),
2337 },
2338 );
2339
2340 let manifest = r#"
2341 <manifest>
2342 <item id="content_001" href="content_001.xhtml"/>
2343 <item id="nav" properties="nav" href="nav.xhtml"/>
2344 </manifest>
2345 "#;
2346 let element = XmlReader::parse(manifest);
2347 assert!(element.is_ok());
2348
2349 let element = element.unwrap();
2350 let result = doc.parse_manifest(&element);
2351 assert!(result.is_err());
2352 assert_eq!(
2353 result.unwrap_err(),
2354 EpubError::MissingRequiredAttribute {
2355 tag: "item".to_string(),
2356 attribute: "media-type".to_string(),
2357 },
2358 );
2359
2360 let manifest = r#"
2361 <manifest>
2362 <item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
2363 <item id="nav" properties="nav" href="nav.xhtml" media-type="application/xhtml+xml"/>
2364 </manifest>
2365 "#;
2366 let element = XmlReader::parse(manifest);
2367 assert!(element.is_ok());
2368
2369 let element = element.unwrap();
2370 let result = doc.parse_manifest(&element);
2371 assert!(result.is_ok());
2372 }
2373
2374 #[test]
2376 fn test_fn_has_encryption() {
2377 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2378 let doc = EpubDoc::new(epub_file);
2379 assert!(doc.is_ok());
2380
2381 let doc = doc.unwrap();
2382 assert!(doc.has_encryption());
2383 }
2384
2385 #[test]
2387 fn test_fn_parse_encryption() {
2388 let epub_file = Path::new("./test_case/ocf-font_obfuscation.epub");
2389 let doc = EpubDoc::new(epub_file);
2390 assert!(doc.is_ok());
2391
2392 let doc = doc.unwrap();
2393 assert!(doc.encryption.is_some());
2394
2395 let encryption = doc.encryption.unwrap();
2396 assert_eq!(encryption.len(), 1);
2397 assert_eq!(encryption[0].method, "http://www.idpf.org/2008/embedding");
2398 assert_eq!(encryption[0].data, "EPUB/fonts/Lobster.ttf");
2399 }
2400
2401 #[test]
2402 fn test_get_metadata_existing_key() {
2403 let epub_file = Path::new("./test_case/epub-33.epub");
2404 let doc = EpubDoc::new(epub_file);
2405 assert!(doc.is_ok());
2406
2407 let doc = doc.unwrap();
2408
2409 let titles = doc.get_metadata("title");
2410 assert!(titles.is_some());
2411
2412 let titles = titles.unwrap();
2413 assert_eq!(titles.len(), 1);
2414 assert_eq!(titles[0].property, "title");
2415 assert_eq!(titles[0].value, "EPUB 3.3");
2416
2417 let languages = doc.get_metadata("language");
2418 assert!(languages.is_some());
2419
2420 let languages = languages.unwrap();
2421 assert_eq!(languages.len(), 1);
2422 assert_eq!(languages[0].property, "language");
2423 assert_eq!(languages[0].value, "en-us");
2424
2425 let language = doc.get_language();
2426 assert!(language.is_ok());
2427 assert_eq!(language.unwrap(), vec!["en-us"]);
2428 }
2429
2430 #[test]
2431 fn test_get_metadata_nonexistent_key() {
2432 let epub_file = Path::new("./test_case/epub-33.epub");
2433 let doc = EpubDoc::new(epub_file);
2434 assert!(doc.is_ok());
2435
2436 let doc = doc.unwrap();
2437 let metadata = doc.get_metadata("nonexistent");
2438 assert!(metadata.is_none());
2439 }
2440
2441 #[test]
2442 fn test_get_metadata_multiple_items_same_type() {
2443 let epub_file = Path::new("./test_case/epub-33.epub");
2444 let doc = EpubDoc::new(epub_file);
2445 assert!(doc.is_ok());
2446
2447 let doc = doc.unwrap();
2448
2449 let creators = doc.get_metadata("creator");
2450 assert!(creators.is_some());
2451
2452 let creators = creators.unwrap();
2453 assert_eq!(creators.len(), 3);
2454
2455 assert_eq!(creators[0].id, Some("creator_id_0".to_string()));
2456 assert_eq!(creators[0].property, "creator");
2457 assert_eq!(creators[0].value, "Matt Garrish, DAISY Consortium");
2458
2459 assert_eq!(creators[1].id, Some("creator_id_1".to_string()));
2460 assert_eq!(creators[1].property, "creator");
2461 assert_eq!(creators[1].value, "Ivan Herman, W3C");
2462
2463 assert_eq!(creators[2].id, Some("creator_id_2".to_string()));
2464 assert_eq!(creators[2].property, "creator");
2465 assert_eq!(creators[2].value, "Dave Cramer, Invited Expert");
2466 }
2467
2468 #[test]
2469 fn test_get_metadata_with_refinement() {
2470 let epub_file = Path::new("./test_case/epub-33.epub");
2471 let doc = EpubDoc::new(epub_file);
2472 assert!(doc.is_ok());
2473
2474 let doc = doc.unwrap();
2475
2476 let title = doc.get_metadata("title");
2477 assert!(title.is_some());
2478
2479 let title = title.unwrap();
2480 assert_eq!(title.len(), 1);
2481 assert_eq!(title[0].refined.len(), 1);
2482 assert_eq!(title[0].refined[0].property, "title-type");
2483 assert_eq!(title[0].refined[0].value, "main");
2484 }
2485
2486 #[test]
2487 fn test_get_manifest_item_with_fallback() {
2488 let epub_file = Path::new("./test_case/pub-foreign_bad-fallback.epub");
2489 let doc = EpubDoc::new(epub_file);
2490 assert!(doc.is_ok());
2491
2492 let doc = doc.unwrap();
2493 assert!(doc.get_manifest_item("content_001").is_ok());
2494 assert!(doc.get_manifest_item("bar").is_ok());
2495
2496 if let Ok((_, mime)) = doc.get_manifest_item_with_fallback("content_001", vec!["image/psd"])
2498 {
2499 assert_eq!(mime, "image/psd");
2500 } else {
2501 assert!(false, "get_manifest_item_with_fallback failed");
2502 }
2503
2504 assert_eq!(
2506 doc.get_manifest_item_with_fallback("content_001", vec!["application/xhtml+xml"])
2507 .unwrap_err()
2508 .to_string(),
2509 "No supported file format: The fallback resource does not contain the file format you support."
2510 );
2511 }
2512
2513 #[test]
2514 fn test_get_cover() {
2515 let epub_file = Path::new("./test_case/pkg-cover-image.epub");
2516 let doc = EpubDoc::new(epub_file);
2517 if let Err(err) = &doc {
2518 println!("{}", err);
2519 }
2520 assert!(doc.is_ok());
2521
2522 let doc = doc.unwrap();
2523 let result = doc.get_cover();
2524 assert!(result.is_some());
2525
2526 let (data, mime) = result.unwrap();
2527 assert_eq!(data.len(), 5785);
2528 assert_eq!(mime, "image/jpeg");
2529 }
2530
2531 #[test]
2532 fn test_epub_2() {
2533 let epub_file = Path::new("./test_case/epub-2.epub");
2534 let doc = EpubDoc::new(epub_file);
2535 assert!(doc.is_ok());
2536
2537 let doc = doc.unwrap();
2538
2539 let titles = doc.get_title();
2540 assert!(titles.is_ok());
2541 assert_eq!(titles.unwrap(), vec!["Minimal EPUB 2.0"]);
2542 }
2543
2544 #[test]
2545 fn test_is_valid_epub_valid_file() {
2546 let result = EpubDoc::is_valid_epub("./test_case/epub-2.epub");
2547 assert!(result.is_ok());
2548 assert_eq!(result.unwrap(), true);
2549 }
2550
2551 #[test]
2552 fn test_is_valid_epub_invalid_path() {
2553 let result = EpubDoc::is_valid_epub("./test_case/nonexistent.epub");
2554 assert!(result.is_err());
2555 }
2556
2557 #[test]
2558 fn test_is_valid_epub_corrupted_zip() {
2559 let temp_dir = std::env::temp_dir();
2560 let corrupted_file = temp_dir.join("corrupted.epub");
2561
2562 std::fs::write(&corrupted_file, b"not a valid zip file").unwrap();
2563
2564 let result = EpubDoc::is_valid_epub(&corrupted_file);
2565
2566 assert!(result.is_err());
2567 let err = result.unwrap_err();
2568 assert!(matches!(err, EpubError::ArchiveError { .. }));
2569
2570 std::fs::remove_file(corrupted_file).ok();
2571 }
2572
2573 #[test]
2574 fn test_is_valid_epub_valid_epub_3() {
2575 let result = EpubDoc::is_valid_epub("./test_case/epub-33.epub");
2576 assert!(result.is_ok());
2577 assert_eq!(result.unwrap(), true);
2578 }
2579
2580 #[test]
2581 fn test_is_outside_error() {
2582 let archive_error = EpubError::ArchiveError {
2583 source: zip::result::ZipError::Io(std::io::Error::new(
2584 std::io::ErrorKind::Other,
2585 "test",
2586 )),
2587 };
2588 assert!(EpubDoc::<BufReader<File>>::is_outside_error(&archive_error));
2589
2590 let io_error = EpubError::IOError {
2591 source: std::io::Error::new(std::io::ErrorKind::NotFound, "test"),
2592 };
2593 assert!(EpubDoc::<BufReader<File>>::is_outside_error(&io_error));
2594
2595 let non_canonical = EpubError::NonCanonicalEpub { expected_file: "test".to_string() };
2596 assert!(!EpubDoc::<BufReader<File>>::is_outside_error(
2597 &non_canonical
2598 ));
2599
2600 let missing_attr = EpubError::MissingRequiredAttribute {
2601 tag: "test".to_string(),
2602 attribute: "id".to_string(),
2603 };
2604 assert!(!EpubDoc::<BufReader<File>>::is_outside_error(&missing_attr));
2605 }
2606}