1use std::{
2 cmp,
3 collections::HashMap,
4 io::{Read, Seek},
5 path::PathBuf,
6};
7
8#[cfg(feature = "builder")]
9use chrono::Local;
10use quick_xml::{NsReader, events::Event};
11use sha1::{Digest, Sha1};
12use zip::{CompressionMethod, ZipArchive};
13
14use crate::error::EpubError;
15
16#[cfg(feature = "builder")]
17pub static ELEMENT_IN_DC_NAMESPACE: std::sync::LazyLock<Vec<&str>> =
18 std::sync::LazyLock::new(|| {
19 vec![
20 "contributor",
21 "coverage",
22 "creator",
23 "date",
24 "description",
25 "format",
26 "identifier",
27 "language",
28 "publisher",
29 "relation",
30 "rights",
31 "source",
32 "subject",
33 "title",
34 "type",
35 ]
36 });
37
38#[cfg(feature = "builder")]
39pub fn local_time() -> String {
41 Local::now().format("%Y-%m-%dT%H-%M-%S.%fU%z").to_string()
42}
43
44pub fn get_file_in_zip_archive<R: Read + Seek>(
64 zip_file: &mut ZipArchive<R>,
65 file_name: &str,
66) -> Result<Vec<u8>, EpubError> {
67 let mut buffer = Vec::<u8>::new();
68 match zip_file.by_name(file_name) {
69 Ok(mut file) => {
70 let _ = file.read_to_end(&mut buffer).map_err(EpubError::from)?;
71 Ok(buffer)
72 }
73 Err(err) => Err(EpubError::from(err)),
74 }
75}
76
77pub fn compression_method_check<R: Read + Seek>(
97 zip_archive: &mut ZipArchive<R>,
98) -> Result<(), EpubError> {
99 for index in 0..zip_archive.len() {
100 let file = zip_archive.by_index(index)?;
101
102 match file.compression() {
103 CompressionMethod::Stored | CompressionMethod::Deflated => continue,
104 method => {
105 return Err(EpubError::UnusableCompressionMethod {
106 file: file.name().to_string(),
107 method: method.to_string(),
108 });
109 }
110 };
111 }
112
113 Ok(())
114}
115
116pub fn check_realtive_link_leakage(
132 epub_path: PathBuf,
133 current_dir: PathBuf,
134 check_file: &str,
135) -> Option<String> {
136 let parts = check_file.split("../").collect::<Vec<&str>>();
139 let folder_depth = parts.len() - 1;
140 let remaining = *parts.last().unwrap_or(&"");
141
142 let mut current_path = epub_path.join(current_dir);
144 for _ in 0..folder_depth {
145 if !current_path.pop() {
146 return None;
149 }
150 }
151
152 let prefix_path = match current_path.strip_prefix(&epub_path) {
154 Ok(path) => path.to_str().unwrap(),
155 Err(_) => return None, };
157
158 let path = match prefix_path {
160 "" => remaining.to_string(),
161 _ => format!("{}/{}", prefix_path, remaining),
162 };
163 Some(path)
164}
165
166#[cfg(feature = "builder")]
170pub fn remove_leading_slash<P: AsRef<std::path::Path>>(path: P) -> PathBuf {
171 if let Ok(path) = path.as_ref().strip_prefix("/") {
172 path.to_path_buf()
173 } else {
174 path.as_ref().to_path_buf()
175 }
176}
177
178pub fn idpf_font_encryption(data: &[u8], key: &str) -> Vec<u8> {
196 if data.is_empty() {
197 return Vec::new();
198 }
199
200 let hash = {
201 let mut hasher = Sha1::new();
202 hasher.update(key.as_bytes());
203 hasher.finalize()
204 };
205
206 let mut obfuscated_data = data.to_vec();
207 let limit = cmp::min(1040, data.len());
208
209 for (index, byte) in obfuscated_data.iter_mut().take(limit).enumerate() {
210 *byte ^= hash[index % hash.len()]
211 }
212
213 obfuscated_data
214}
215
216pub fn idpf_font_dencryption(data: &[u8], key: &str) -> Vec<u8> {
229 idpf_font_encryption(data, key)
230}
231
232pub fn adobe_font_encryption(data: &[u8], key: &str) -> Vec<u8> {
251 if data.is_empty() {
252 return Vec::new();
253 }
254
255 let mut obfuscated_data = data.to_vec();
256 let limit = cmp::min(1024, data.len());
257
258 for (index, byte) in obfuscated_data.iter_mut().take(limit).enumerate() {
259 *byte ^= key.as_bytes()[index % key.len()];
260 }
261
262 obfuscated_data
263}
264
265pub fn adobe_font_dencryption(data: &[u8], key: &str) -> Vec<u8> {
279 adobe_font_encryption(data, key)
280}
281
282pub trait DecodeBytes {
297 fn decode(&self) -> Result<String, EpubError>;
298}
299
300impl DecodeBytes for Vec<u8> {
301 fn decode(&self) -> Result<String, EpubError> {
302 if self.is_empty() || self.len() < 4 {
303 return Err(EpubError::EmptyDataError);
304 }
305
306 match self.as_slice() {
307 [0xEF, 0xBB, 0xBF, rest @ ..] => {
309 String::from_utf8(rest.to_vec()).map_err(EpubError::from)
310 }
311
312 [0xFE, 0xFF, rest @ ..] => {
314 let utf16_units = rest
315 .chunks_exact(2)
316 .map(|b| u16::from_be_bytes([b[0], b[1]]))
317 .collect::<Vec<u16>>();
318
319 String::from_utf16(&utf16_units).map_err(EpubError::from)
320 }
321
322 [0xFF, 0xFE, rest @ ..] => {
324 let utf16_units = rest
325 .chunks_exact(2)
326 .map(|b| u16::from_le_bytes([b[0], b[1]]))
327 .collect::<Vec<u16>>();
328
329 String::from_utf16(&utf16_units).map_err(EpubError::from)
330 }
331
332 _ => {
336 let lossless = String::from_utf8_lossy(self);
340 if !lossless.contains('\u{FFFD}') {
341 return Ok(lossless.into_owned());
342 }
343
344 if self.len() % 2 == 0 {
345 if let Ok(str) = String::from_utf16(
347 &self
348 .chunks_exact(2)
349 .map(|b| u16::from_be_bytes([b[0], b[1]]))
350 .collect::<Vec<u16>>(),
351 ) {
352 return Ok(str);
353 }
354
355 if let Ok(str) = String::from_utf16(
357 &self
358 .chunks_exact(2)
359 .map(|b| u16::from_le_bytes([b[0], b[1]]))
360 .collect::<Vec<u16>>(),
361 ) {
362 return Ok(str);
363 }
364 }
365
366 Ok(String::from_utf8_lossy(self).to_string())
368 }
369 }
370 }
371}
372
373pub trait NormalizeWhitespace {
382 fn normalize_whitespace(&self) -> String;
383}
384
385impl NormalizeWhitespace for &str {
386 fn normalize_whitespace(&self) -> String {
387 let mut result = String::new();
388 let mut is_first = true;
389
390 for word in self.split_whitespace() {
391 if !is_first {
392 result.push(' ');
393 }
394 result.push_str(word);
395 is_first = false;
396 }
397
398 result
399 }
400}
401
402impl NormalizeWhitespace for String {
403 fn normalize_whitespace(&self) -> String {
404 self.as_str().normalize_whitespace()
405 }
406}
407
408#[derive(Debug)]
410pub struct XmlElement {
411 pub name: String,
413
414 pub prefix: Option<String>,
416
417 pub namespace: Option<String>,
419
420 pub attributes: HashMap<String, String>,
424
425 pub text: Option<String>,
427
428 pub cdata: Option<String>,
430
431 pub children: Vec<XmlElement>,
433}
434
435impl XmlElement {
436 pub fn new(name: String) -> Self {
438 Self {
439 name,
440 prefix: None,
441 namespace: None,
442 attributes: HashMap::new(),
443 text: None,
444 cdata: None,
445 children: Vec::new(),
446 }
447 }
448
449 pub fn tag_name(&self) -> String {
454 match &self.prefix {
455 Some(prefix) => format!("{}:{}", prefix, self.name),
456 None => self.name.clone(),
457 }
458 }
459
460 pub fn text(&self) -> String {
465 let mut result = String::new();
466
467 if let Some(text_value) = &self.text {
468 result.push_str(text_value);
469 }
470
471 for child in &self.children {
472 result.push_str(&child.text());
473 }
474
475 result.trim().to_string()
476 }
477
478 pub fn get_attr(&self, name: &str) -> Option<String> {
480 self.attributes.get(name).cloned()
481 }
482
483 pub fn find_elements_by_name(&self, name: &str) -> impl Iterator<Item = &XmlElement> {
485 SearchElementsByNameIter::new(self, name)
486 }
487
488 pub fn find_children_by_name(&self, name: &str) -> impl Iterator<Item = &XmlElement> {
490 self.children.iter().filter(move |child| child.name == name)
491 }
492
493 pub fn find_children_by_names(&self, names: &[&str]) -> impl Iterator<Item = &XmlElement> {
495 self.children
496 .iter()
497 .filter(move |child| names.contains(&child.name.as_str()))
498 }
499
500 pub fn children(&self) -> impl Iterator<Item = &XmlElement> {
502 self.children.iter()
503 }
504}
505
506struct SearchElementsByNameIter<'a> {
507 elements: Vec<&'a XmlElement>,
508 current_index: usize,
509 target_name: String,
510}
511
512impl<'a> SearchElementsByNameIter<'a> {
513 fn new(root: &'a XmlElement, name: &str) -> Self {
514 let mut elements = Vec::new();
515 Self::collect_elements(root, &mut elements);
516 Self {
517 elements,
518 current_index: 0,
519 target_name: name.to_string(),
520 }
521 }
522
523 fn collect_elements(element: &'a XmlElement, collection: &mut Vec<&'a XmlElement>) {
524 collection.push(element);
525 for child in &element.children {
526 Self::collect_elements(child, collection);
527 }
528 }
529}
530
531impl<'a> Iterator for SearchElementsByNameIter<'a> {
532 type Item = &'a XmlElement;
533
534 fn next(&mut self) -> Option<Self::Item> {
535 while self.current_index < self.elements.len() {
536 let element = self.elements[self.current_index];
537 self.current_index += 1;
538 if element.name == self.target_name {
539 return Some(element);
540 }
541 }
542 None
543 }
544}
545
546pub struct XmlReader {}
548
549impl XmlReader {
551 pub fn parse(content: &str) -> Result<XmlElement, EpubError> {
563 if content.is_empty() {
564 return Err(EpubError::EmptyDataError);
565 }
566
567 let mut reader = NsReader::from_str(content);
569 reader.config_mut().trim_text(true);
570
571 let mut buf = Vec::new();
572 let mut stack = Vec::<XmlElement>::new();
573 let mut root = None;
574 let mut namespace_map = HashMap::new();
575
576 loop {
578 match reader.read_event_into(&mut buf) {
579 Ok(Event::Eof) => break,
581
582 Ok(Event::Start(e)) => {
584 let name = String::from_utf8_lossy(e.local_name().as_ref()).to_string();
585 let mut element = XmlElement::new(name);
586
587 if let Some(prefix) = e.name().prefix() {
588 element.prefix = Some(String::from_utf8_lossy(prefix.as_ref()).to_string());
589 }
590
591 for attr in e.attributes().flatten() {
592 let attr_key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
593 let attr_value = String::from_utf8_lossy(&attr.value).to_string();
594
595 if attr_key.contains("xmlns") {
597 let attr_keys = attr_key.split(":").collect::<Vec<&str>>();
598 if attr_keys.len() >= 2 {
599 namespace_map.insert(attr_keys[1].to_string(), attr_value);
600 } else {
601 namespace_map.insert(attr_key, attr_value);
602 }
603
604 continue;
605 }
606
607 element.attributes.insert(attr_key, attr_value);
608 }
609
610 stack.push(element);
611 }
612
613 Ok(Event::End(_)) => {
615 if let Some(element) = stack.pop() {
616 if stack.is_empty() {
619 root = Some(element);
620 } else if let Some(parent) = stack.last_mut() {
621 parent.children.push(element);
624 }
625 }
626 }
627
628 Ok(Event::Empty(e)) => {
630 let name = String::from_utf8_lossy(e.local_name().as_ref()).to_string();
631 let mut element = XmlElement::new(name);
632
633 if let Some(prefix) = e.name().prefix() {
634 element.prefix = Some(String::from_utf8_lossy(prefix.as_ref()).to_string());
635 }
636
637 for attr in e.attributes().flatten() {
638 let attr_key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
639 let attr_value = String::from_utf8_lossy(&attr.value).to_string();
640
641 if attr_key.contains("xmlns") {
642 let attr_keys = attr_key.split(":").collect::<Vec<&str>>();
643 if attr_keys.len() >= 2 {
644 namespace_map.insert(attr_keys[1].to_string(), attr_value);
645 } else {
646 namespace_map.insert(attr_key, attr_value);
647 }
648
649 continue;
650 }
651
652 element.attributes.insert(attr_key, attr_value);
653 }
654
655 if let Some(parent) = stack.last_mut() {
658 parent.children.push(element);
659 }
660 }
661
662 Ok(Event::Text(e)) => {
664 if let Some(element) = stack.last_mut() {
665 let text = String::from_utf8_lossy(e.as_ref()).to_string();
666 if !text.trim().is_empty() {
667 element.text = Some(text);
668 }
669 }
670 }
671
672 Ok(Event::CData(e)) => {
674 if let Some(element) = stack.last_mut() {
675 element.cdata = Some(String::from_utf8_lossy(e.as_ref()).to_string());
676 }
677 }
678
679 Err(err) => return Err(err.into()),
680
681 _ => continue,
684 }
685 }
686
687 if let Some(element) = root.as_mut() {
688 Self::assign_namespace(element, &namespace_map);
689 }
690
691 root.ok_or(EpubError::EmptyDataError)
693 }
694
695 fn assign_namespace(element: &mut XmlElement, namespace_map: &HashMap<String, String>) {
707 if let Some(prefix) = &element.prefix {
708 if let Some(namespace) = namespace_map.get(prefix) {
709 element.namespace = Some(namespace.clone());
710 }
711 } else if let Some(namespace) = namespace_map.get("xmlns") {
712 element.namespace = Some(namespace.clone());
713 }
714
715 for chiled in element.children.iter_mut() {
716 Self::assign_namespace(chiled, namespace_map);
717 }
718 }
719}
720
721#[cfg(test)]
722mod tests {
723 use crate::{
724 error::EpubError,
725 utils::{
726 DecodeBytes, NormalizeWhitespace, adobe_font_dencryption, adobe_font_encryption,
727 idpf_font_dencryption, idpf_font_encryption,
728 },
729 };
730
731 #[test]
733 fn test_decode_empty_data() {
734 let data = vec![];
735 let result = data.decode();
736 assert!(result.is_err());
737 assert_eq!(result.unwrap_err(), EpubError::EmptyDataError);
738 }
739
740 #[test]
742 fn test_decode_short_data() {
743 let data = vec![0xEF, 0xBB];
744 let result = data.decode();
745 assert!(result.is_err());
746 assert_eq!(result.unwrap_err(), EpubError::EmptyDataError);
747 }
748
749 #[test]
751 fn test_decode_utf8_with_bom() {
752 let data: Vec<u8> = vec![0xEF, 0xBB, 0xBF, b'H', b'e', b'l', b'l', b'o'];
753 let result = data.decode();
754 assert!(result.is_ok());
755 assert_eq!(result.unwrap(), "Hello");
756 }
757
758 #[test]
760 fn test_decode_utf16_be_with_bom() {
761 let data = vec![
762 0xFE, 0xFF, 0x00, b'H', 0x00, b'e', 0x00, b'l', 0x00, b'l', 0x00, b'o', ];
769 let result = data.decode();
770 assert!(result.is_ok());
771 assert_eq!(result.unwrap(), "Hello");
772 }
773
774 #[test]
776 fn test_decode_utf16_le_with_bom() {
777 let data = vec![
778 0xFF, 0xFE, b'H', 0x00, b'e', 0x00, b'l', 0x00, b'l', 0x00, b'o', 0x00, ];
785 let result = data.decode();
786 assert!(result.is_ok());
787 assert_eq!(result.unwrap(), "Hello");
788 }
789
790 #[test]
792 fn test_decode_plain_utf8() {
793 let data = b"Hello, World!".to_vec();
794 let result = data.decode();
795 assert!(result.is_ok());
796 assert_eq!(result.unwrap(), "Hello, World!");
797 }
798
799 #[test]
801 fn test_normalize_whitespace_trait() {
802 let text = " Hello,\tWorld!\n\nRust ";
804 let normalized = text.normalize_whitespace();
805 assert_eq!(normalized, "Hello, World! Rust");
806
807 let text_string = String::from(" Hello,\tWorld!\n\nRust ");
809 let normalized = text_string.normalize_whitespace();
810
811 assert_eq!(normalized, "Hello, World! Rust");
812 }
813
814 #[test]
815 fn test_idpf_font_encryption_empty_data() {
816 let data = vec![];
817 let key = "test-key";
818 let result = idpf_font_encryption(&data, key);
819
820 assert!(result.is_empty());
821 }
822
823 #[test]
824 fn test_idpf_font_encryption_data_less_than_1040() {
825 let data = vec![0x01, 0x02, 0x03, 0x04, 0x05];
826 let key = "test-key";
827 let encrypted = idpf_font_encryption(&data, key);
828 let decrypted = idpf_font_dencryption(&encrypted, key);
829
830 assert_eq!(decrypted, data);
831 }
832
833 #[test]
834 fn test_idpf_font_encryption_data_greater_than_1040() {
835 let data = (0..2048).map(|i| i as u8).collect::<Vec<_>>();
836 let key = "test-key-12345";
837 let encrypted = idpf_font_encryption(&data, key);
838 let decrypted = idpf_font_dencryption(&encrypted, key);
839
840 assert_eq!(decrypted, data);
841 assert_ne!(&encrypted[..1040], &data[..1040]);
842 assert_eq!(&encrypted[1040..], &data[1040..]);
843 }
844
845 #[test]
846 fn test_idpf_font_encryption_decryption_inverse() {
847 let data = b"Test font data for IDPF encryption verification".to_vec();
848 let key = "epub-id-12345";
849 let encrypted = idpf_font_encryption(&data, key);
850 let decrypted = idpf_font_dencryption(&encrypted, key);
851
852 assert_eq!(decrypted, data);
853 assert_ne!(encrypted, data);
854 }
855
856 #[test]
857 fn test_idpf_font_encryption_different_keys_produce_different_results() {
858 let data = b"Same data for all keys test".to_vec();
859 let key1 = "key-one";
860 let key2 = "key-two";
861 let encrypted1 = idpf_font_encryption(&data, key1);
862 let encrypted2 = idpf_font_encryption(&data, key2);
863
864 assert_ne!(encrypted1, encrypted2);
865 }
866
867 #[test]
868 fn test_idpf_font_encryption_same_key_twice_reverses() {
869 let data = b"Double encryption test data".to_vec();
870 let key = "reversible-key";
871 let once = idpf_font_encryption(&data, key);
872 let twice = idpf_font_encryption(&once, key);
873
874 assert_eq!(twice, data);
875 }
876
877 #[test]
878 fn test_adobe_font_encryption_empty_data() {
879 let data = vec![];
880 let key = "test-key-123456";
881 let result = adobe_font_encryption(&data, key);
882
883 assert!(result.is_empty());
884 }
885
886 #[test]
887 fn test_adobe_font_encryption_data_less_than_1024() {
888 let data = vec![0x10, 0x20, 0x30, 0x40, 0x50];
889 let key = "1234567890123456";
890 let encrypted = adobe_font_encryption(&data, key);
891 let decrypted = adobe_font_dencryption(&encrypted, key);
892
893 assert_eq!(decrypted, data);
894 }
895
896 #[test]
897 fn test_adobe_font_encryption_data_greater_than_1024() {
898 let data: Vec<u8> = (0..2048).map(|i| i as u8).collect();
899 let key = "adobe-key-16byte";
900 let encrypted = adobe_font_encryption(&data, key);
901 let decrypted = adobe_font_dencryption(&encrypted, key);
902
903 assert_eq!(decrypted, data);
904 assert_ne!(&encrypted[..1024], &data[..1024]);
905 assert_eq!(&encrypted[1024..], &data[1024..]);
906 }
907
908 #[test]
909 fn test_adobe_font_encryption_decryption_inverse() {
910 let data = b"Test font data for Adobe encryption verification".to_vec();
911 let key = "1234567890123456";
912 let encrypted = adobe_font_encryption(&data, key);
913 let decrypted = adobe_font_dencryption(&encrypted, key);
914
915 assert_eq!(decrypted, data);
916 assert_ne!(encrypted, data);
917 }
918
919 #[test]
920 fn test_adobe_font_encryption_different_keys_produce_different_results() {
921 let data = b"Same data for all keys test".to_vec();
922 let key1 = "1234567890123456";
923 let key2 = "abcdefghijklmnop";
924 let encrypted1 = adobe_font_encryption(&data, key1);
925 let encrypted2 = adobe_font_encryption(&data, key2);
926
927 assert_ne!(encrypted1, encrypted2);
928 }
929
930 #[test]
931 fn test_adobe_font_encryption_same_key_twice_reverses() {
932 let data = b"Double encryption test data".to_vec();
933 let key = "1234567890123456";
934 let once = adobe_font_encryption(&data, key);
935 let twice = adobe_font_encryption(&once, key);
936
937 assert_eq!(twice, data);
938 }
939
940 #[test]
941 fn test_adobe_font_encryption_key_length_handling() {
942 let data = b"Test data".to_vec();
943 let key = "short";
944 let encrypted = adobe_font_encryption(&data, key);
945 let decrypted = adobe_font_dencryption(&encrypted, key);
946
947 assert_eq!(decrypted, data);
948 }
949}