1extern crate alloc;
7
8use alloc::format;
9use alloc::string::{String, ToString};
10use alloc::vec::Vec;
11use quick_xml::events::Event;
12use quick_xml::reader::Reader;
13
14use crate::error::EpubError;
15
16const MAX_MANIFEST_ITEMS: usize = 1024;
18
19const MAX_SUBJECTS: usize = 64;
21
22const MAX_GUIDE_REFS: usize = 64;
24
25#[derive(Clone, Debug, PartialEq, Eq)]
27pub struct ManifestItem {
28 pub id: String,
30 pub href: String,
32 pub media_type: String,
34 pub properties: Option<String>,
36}
37
38#[derive(Clone, Debug, PartialEq, Eq)]
40pub struct GuideRef {
41 pub guide_type: String,
43 pub title: Option<String>,
45 pub href: String,
47}
48
49#[derive(Clone, Debug, PartialEq, Eq)]
51pub struct EpubMetadata {
52 pub title: String,
54 pub author: String,
56 pub language: String,
58 pub manifest: Vec<ManifestItem>,
60 pub cover_id: Option<String>,
62
63 pub date: Option<String>,
66 pub publisher: Option<String>,
68 pub rights: Option<String>,
70 pub description: Option<String>,
72 pub subjects: Vec<String>,
74 pub identifier: Option<String>,
76
77 pub modified: Option<String>,
80 pub rendition_layout: Option<String>,
82
83 pub guide: Vec<GuideRef>,
86
87 pub opf_path: Option<String>,
90}
91
92impl Default for EpubMetadata {
93 fn default() -> Self {
94 Self {
95 title: String::with_capacity(0),
96 author: String::with_capacity(0),
97 language: String::from("en"),
98 manifest: Vec::with_capacity(0),
99 cover_id: None,
100 date: None,
101 publisher: None,
102 rights: None,
103 description: None,
104 subjects: Vec::with_capacity(0),
105 identifier: None,
106 modified: None,
107 rendition_layout: None,
108 guide: Vec::with_capacity(0),
109 opf_path: None,
110 }
111 }
112}
113
114impl EpubMetadata {
115 pub fn new() -> Self {
117 Self::default()
118 }
119
120 pub fn get_item(&self, id: &str) -> Option<&ManifestItem> {
122 self.manifest.iter().find(|item| item.id == id)
123 }
124
125 pub fn get_cover_item(&self) -> Option<&ManifestItem> {
127 self.cover_id.as_ref().and_then(|id| self.get_item(id))
128 }
129
130 pub fn find_item_by_href(&self, href: &str) -> Option<&str> {
132 self.manifest
133 .iter()
134 .find(|item| item.href == href)
135 .map(|item| item.id.as_str())
136 }
137}
138
139fn local_attr_name(name: &str) -> &str {
140 local_name(name)
141}
142
143fn is_supported_cover_ref(href: &str) -> bool {
144 let trimmed = href.trim();
145 if trimmed.is_empty() || trimmed.starts_with('#') {
146 return false;
147 }
148 let lower = trimmed.to_ascii_lowercase();
149 !lower.starts_with("data:")
150}
151
152pub fn extract_cover_image_href_from_xhtml(content: &[u8]) -> Option<String> {
161 let mut reader = Reader::from_reader(content);
162 reader.config_mut().trim_text(true);
163
164 let mut buf = Vec::with_capacity(0);
165 loop {
166 match reader.read_event_into(&mut buf) {
167 Ok(Event::Start(e)) | Ok(Event::Empty(e)) => {
168 let name = reader.decoder().decode(e.name().as_ref()).ok()?.to_string();
169 let local = local_name(&name);
170 if local != "img" && local != "image" {
171 buf.clear();
172 continue;
173 }
174
175 for attr in e.attributes() {
176 let attr = attr.ok()?;
177 let key = reader.decoder().decode(attr.key.as_ref()).ok()?.to_string();
178 let key_local = local_attr_name(&key);
179 if key_local != "src" && key_local != "href" {
180 continue;
181 }
182 let value = reader.decoder().decode(&attr.value).ok()?.to_string();
183 if is_supported_cover_ref(&value) {
184 return Some(value);
185 }
186 }
187 }
188 Ok(Event::Eof) => break,
189 Err(_) => return None,
190 _ => {}
191 }
192 buf.clear();
193 }
194 None
195}
196
197#[cfg(not(feature = "std"))]
201pub fn parse_container_xml(content: &[u8]) -> Result<String, EpubError> {
202 let mut reader = Reader::from_reader(content);
203 reader.config_mut().trim_text(true);
204
205 let mut buf = Vec::with_capacity(0);
206 let mut opf_path: Option<String> = None;
207
208 loop {
209 match reader.read_event_into(&mut buf) {
210 Ok(Event::Start(e)) | Ok(Event::Empty(e)) => {
211 let name = reader
212 .decoder()
213 .decode(e.name().as_ref())
214 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?
215 .to_string();
216 let local = local_name(&name);
217
218 if local == "rootfile" {
219 for attr in e.attributes() {
221 let attr =
222 attr.map_err(|e| EpubError::Parse(format!("Attr error: {:?}", e)))?;
223 let key = reader
224 .decoder()
225 .decode(attr.key.as_ref())
226 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?;
227 if key == "full-path" {
228 let value = reader
229 .decoder()
230 .decode(&attr.value)
231 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?
232 .to_string();
233 opf_path = Some(value);
234 break;
235 }
236 }
237 }
238 }
239 Ok(Event::Eof) => break,
240 Err(e) => return Err(EpubError::Parse(format!("XML parse error: {:?}", e))),
241 _ => {}
242 }
243 buf.clear();
244 }
245
246 opf_path.ok_or_else(|| EpubError::InvalidEpub("No rootfile found in container.xml".into()))
247}
248
249#[cfg(feature = "std")]
250fn parse_container_xml_reader<R: std::io::BufRead>(reader: R) -> Result<String, EpubError> {
251 let mut reader = Reader::from_reader(reader);
252 reader.config_mut().trim_text(true);
253
254 let mut buf = Vec::with_capacity(0);
255 let mut opf_path: Option<String> = None;
256
257 loop {
258 match reader.read_event_into(&mut buf) {
259 Ok(Event::Start(e)) | Ok(Event::Empty(e)) => {
260 let name = reader
261 .decoder()
262 .decode(e.name().as_ref())
263 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?
264 .to_string();
265 let local = local_name(&name);
266
267 if local == "rootfile" {
268 for attr in e.attributes() {
269 let attr =
270 attr.map_err(|e| EpubError::Parse(format!("Attr error: {:?}", e)))?;
271 let key = reader
272 .decoder()
273 .decode(attr.key.as_ref())
274 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?;
275 if key == "full-path" {
276 let value = reader
277 .decoder()
278 .decode(&attr.value)
279 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?
280 .to_string();
281 opf_path = Some(value);
282 break;
283 }
284 }
285 }
286 }
287 Ok(Event::Eof) => break,
288 Err(e) => return Err(EpubError::Parse(format!("XML parse error: {:?}", e))),
289 _ => {}
290 }
291 buf.clear();
292 }
293
294 opf_path.ok_or_else(|| EpubError::InvalidEpub("No rootfile found in container.xml".into()))
295}
296
297#[cfg(feature = "std")]
301pub fn parse_container_xml(content: &[u8]) -> Result<String, EpubError> {
302 parse_container_xml_reader(content)
303}
304
305fn local_name(name: &str) -> &str {
306 name.rsplit(':').next().unwrap_or(name)
307}
308
309#[cfg(not(feature = "std"))]
313pub fn parse_opf(content: &[u8]) -> Result<EpubMetadata, EpubError> {
314 let mut reader = Reader::from_reader(content);
315 reader.config_mut().trim_text(true);
316
317 let mut buf = Vec::with_capacity(0);
318 let mut metadata = EpubMetadata::new();
319
320 let mut current_element: Option<String> = None;
322 let mut in_metadata = false;
323 let mut in_manifest = false;
324 let mut in_spine = false;
325 let mut in_guide = false;
326 let mut current_meta_property: Option<String> = None;
327
328 loop {
329 match reader.read_event_into(&mut buf) {
330 Ok(Event::Start(e)) | Ok(Event::Empty(e)) => {
331 let name = reader
332 .decoder()
333 .decode(e.name().as_ref())
334 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?
335 .to_string();
336
337 let local = local_name(&name);
338 match local {
340 "metadata" => in_metadata = true,
341 "manifest" => in_manifest = true,
342 "spine" => in_spine = true,
343 "guide" => in_guide = true,
344 _ => {}
345 }
346
347 if in_manifest && local == "item" && metadata.manifest.len() < MAX_MANIFEST_ITEMS {
349 if let Some(item) = parse_manifest_item(&e, &reader)? {
350 if item
352 .properties
353 .as_ref()
354 .is_some_and(|p| p.contains("cover-image"))
355 {
356 metadata.cover_id = Some(item.id.clone());
357 }
358 metadata.manifest.push(item);
359 }
360 }
361
362 if in_metadata {
364 current_element = Some(local.to_string());
365
366 if local == "meta" {
368 let mut name_attr = None;
369 let mut content_attr = None;
370 let mut property_attr = None;
371
372 for attr in e.attributes() {
373 let attr =
374 attr.map_err(|e| EpubError::Parse(format!("Attr error: {:?}", e)))?;
375 let key = reader
376 .decoder()
377 .decode(attr.key.as_ref())
378 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?;
379 let value = reader
380 .decoder()
381 .decode(&attr.value)
382 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?;
383
384 if key == "name" && value == "cover" {
385 name_attr = Some(value.to_string());
386 }
387 if key == "content" {
388 content_attr = Some(value.to_string());
389 }
390 if key == "property" {
391 property_attr = Some(value.to_string());
392 }
393 }
394
395 if name_attr.is_some() && content_attr.is_some() {
396 metadata.cover_id = content_attr;
397 }
398
399 current_meta_property = property_attr;
401 }
402 }
403
404 if in_guide && local == "reference" && metadata.guide.len() < MAX_GUIDE_REFS {
406 if let Some(guide_ref) = parse_guide_reference(&e, &reader)? {
407 metadata.guide.push(guide_ref);
408 }
409 }
410
411 if in_spine && local == "itemref" {
413 }
416 }
417 Ok(Event::Text(e)) => {
418 if let Some(ref elem) = current_element {
419 let text = reader
420 .decoder()
421 .decode(&e)
422 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?
423 .to_string();
424
425 if elem == "meta" {
427 if let Some(ref prop) = current_meta_property {
428 match prop.as_str() {
429 "dcterms:modified" => {
430 metadata.modified = Some(text.clone());
431 }
432 "rendition:layout" => {
433 metadata.rendition_layout = Some(text.clone());
434 }
435 _ => {}
436 }
437 }
438 }
439
440 match elem.as_str() {
442 "title" => {
443 metadata.title = text;
444 }
445 "creator" => {
446 metadata.author = text;
447 }
448 "language" => {
449 metadata.language = text;
450 }
451 "date" => {
452 metadata.date = Some(text);
453 }
454 "publisher" => {
455 metadata.publisher = Some(text);
456 }
457 "rights" => {
458 metadata.rights = Some(text);
459 }
460 "description" => {
461 metadata.description = Some(text);
462 }
463 "subject" => {
464 if metadata.subjects.len() < MAX_SUBJECTS {
465 metadata.subjects.push(text);
466 }
467 }
468 "identifier" => {
469 metadata.identifier = Some(text);
470 }
471 _ => {}
472 }
473 }
474 }
475 Ok(Event::End(e)) => {
476 let name = reader
477 .decoder()
478 .decode(e.name().as_ref())
479 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?
480 .to_string();
481
482 match local_name(&name) {
483 "metadata" => in_metadata = false,
484 "manifest" => in_manifest = false,
485 "spine" => in_spine = false,
486 "guide" => in_guide = false,
487 _ => {}
488 }
489
490 current_element = None;
491 current_meta_property = None;
492 }
493 Ok(Event::Eof) => break,
494 Err(e) => return Err(EpubError::Parse(format!("XML parse error: {:?}", e))),
495 _ => {}
496 }
497 buf.clear();
498 }
499
500 Ok(metadata)
501}
502
503#[cfg(feature = "std")]
504fn parse_opf_reader<R: std::io::BufRead>(reader: R) -> Result<EpubMetadata, EpubError> {
505 let mut reader = Reader::from_reader(reader);
506 reader.config_mut().trim_text(true);
507
508 let mut buf = Vec::with_capacity(0);
509 let mut metadata = EpubMetadata::new();
510
511 let mut current_element: Option<String> = None;
512 let mut in_metadata = false;
513 let mut in_manifest = false;
514 let mut in_spine = false;
515 let mut in_guide = false;
516 let mut current_meta_property: Option<String> = None;
517
518 loop {
519 match reader.read_event_into(&mut buf) {
520 Ok(Event::Start(e)) | Ok(Event::Empty(e)) => {
521 let name = reader
522 .decoder()
523 .decode(e.name().as_ref())
524 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?
525 .to_string();
526
527 let local = local_name(&name);
528 match local {
529 "metadata" => in_metadata = true,
530 "manifest" => in_manifest = true,
531 "spine" => in_spine = true,
532 "guide" => in_guide = true,
533 _ => {}
534 }
535
536 if in_manifest && local == "item" && metadata.manifest.len() < MAX_MANIFEST_ITEMS {
537 if let Some(item) = parse_manifest_item_reader(&e, &reader)? {
538 if item
539 .properties
540 .as_ref()
541 .is_some_and(|p| p.contains("cover-image"))
542 {
543 metadata.cover_id = Some(item.id.clone());
544 }
545 metadata.manifest.push(item);
546 }
547 }
548
549 if in_metadata {
550 current_element = Some(local.to_string());
551
552 if local == "meta" {
553 let mut name_attr = None;
554 let mut content_attr = None;
555 let mut property_attr = None;
556
557 for attr in e.attributes() {
558 let attr =
559 attr.map_err(|e| EpubError::Parse(format!("Attr error: {:?}", e)))?;
560 let key = reader
561 .decoder()
562 .decode(attr.key.as_ref())
563 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?;
564 let value = reader
565 .decoder()
566 .decode(&attr.value)
567 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?;
568
569 if key == "name" && value == "cover" {
570 name_attr = Some(value.to_string());
571 }
572 if key == "content" {
573 content_attr = Some(value.to_string());
574 }
575 if key == "property" {
576 property_attr = Some(value.to_string());
577 }
578 }
579
580 if name_attr.is_some() && content_attr.is_some() {
581 metadata.cover_id = content_attr;
582 }
583
584 current_meta_property = property_attr;
585 }
586 }
587
588 if in_guide && local == "reference" && metadata.guide.len() < MAX_GUIDE_REFS {
589 if let Some(guide_ref) = parse_guide_reference_reader(&e, &reader)? {
590 metadata.guide.push(guide_ref);
591 }
592 }
593
594 if in_spine && local == "itemref" {}
595 }
596 Ok(Event::Text(e)) => {
597 if let Some(ref elem) = current_element {
598 let text = reader
599 .decoder()
600 .decode(&e)
601 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?
602 .to_string();
603
604 match elem.as_str() {
605 "title" => metadata.title = text,
606 "creator" => metadata.author = text,
607 "language" => metadata.language = text,
608 "date" => metadata.date = Some(text),
609 "publisher" => metadata.publisher = Some(text),
610 "rights" => metadata.rights = Some(text),
611 "description" => metadata.description = Some(text),
612 "subject" => {
613 if metadata.subjects.len() < MAX_SUBJECTS {
614 metadata.subjects.push(text);
615 }
616 }
617 "identifier" => metadata.identifier = Some(text),
618 "meta" => {
619 if let Some(property) = current_meta_property.take() {
620 if property == "dcterms:modified" {
621 metadata.modified = Some(text);
622 } else if property == "rendition:layout" {
623 metadata.rendition_layout = Some(text);
624 }
625 }
626 }
627 _ => {}
628 }
629 }
630 }
631 Ok(Event::End(e)) => {
632 let name = reader
633 .decoder()
634 .decode(e.name().as_ref())
635 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?
636 .into_owned();
637 match local_name(&name) {
638 "metadata" => in_metadata = false,
639 "manifest" => in_manifest = false,
640 "spine" => in_spine = false,
641 "guide" => in_guide = false,
642 _ => {}
643 }
644 current_element = None;
645 }
646 Ok(Event::Eof) => break,
647 Err(e) => return Err(EpubError::Parse(format!("XML parse error: {:?}", e))),
648 _ => {}
649 }
650 buf.clear();
651 }
652
653 Ok(metadata)
654}
655
656#[cfg(feature = "std")]
657pub fn parse_opf(content: &[u8]) -> Result<EpubMetadata, EpubError> {
659 parse_opf_reader(content)
660}
661
662#[cfg(not(feature = "std"))]
664fn parse_manifest_item<'a>(
665 e: &quick_xml::events::BytesStart<'a>,
666 reader: &Reader<&[u8]>,
667) -> Result<Option<ManifestItem>, EpubError> {
668 let mut id = None;
669 let mut href = None;
670 let mut media_type = None;
671 let mut properties = None;
672
673 for attr in e.attributes() {
674 let attr = attr.map_err(|e| EpubError::Parse(format!("Attr error: {:?}", e)))?;
675 let key = reader
676 .decoder()
677 .decode(attr.key.as_ref())
678 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?;
679 let value = reader
680 .decoder()
681 .decode(&attr.value)
682 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?
683 .to_string();
684
685 match key.as_ref() {
686 "id" => id = Some(value),
687 "href" => href = Some(value),
688 "media-type" => media_type = Some(value),
689 "properties" => properties = Some(value),
690 _ => {}
691 }
692 }
693
694 if let (Some(id), Some(href), Some(media_type)) = (id, href, media_type) {
695 Ok(Some(ManifestItem {
696 id,
697 href,
698 media_type,
699 properties,
700 }))
701 } else {
702 Ok(None) }
704}
705
706#[cfg(not(feature = "std"))]
708fn parse_guide_reference<'a>(
709 e: &quick_xml::events::BytesStart<'a>,
710 reader: &Reader<&[u8]>,
711) -> Result<Option<GuideRef>, EpubError> {
712 let mut guide_type = None;
713 let mut title = None;
714 let mut href = None;
715
716 for attr in e.attributes() {
717 let attr = attr.map_err(|e| EpubError::Parse(format!("Attr error: {:?}", e)))?;
718 let key = reader
719 .decoder()
720 .decode(attr.key.as_ref())
721 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?;
722 let value = reader
723 .decoder()
724 .decode(&attr.value)
725 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?
726 .to_string();
727
728 match key.as_ref() {
729 "type" => guide_type = Some(value),
730 "title" => title = Some(value),
731 "href" => href = Some(value),
732 _ => {}
733 }
734 }
735
736 if let (Some(guide_type), Some(href)) = (guide_type, href) {
737 Ok(Some(GuideRef {
738 guide_type,
739 title,
740 href,
741 }))
742 } else {
743 Ok(None) }
745}
746
747#[cfg(feature = "std")]
748fn parse_manifest_item_reader<'a, R: std::io::BufRead>(
749 e: &quick_xml::events::BytesStart<'a>,
750 reader: &Reader<R>,
751) -> Result<Option<ManifestItem>, EpubError> {
752 let mut id = None;
753 let mut href = None;
754 let mut media_type = None;
755 let mut properties = None;
756
757 for attr in e.attributes() {
758 let attr = attr.map_err(|e| EpubError::Parse(format!("Attr error: {:?}", e)))?;
759 let key = reader
760 .decoder()
761 .decode(attr.key.as_ref())
762 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?;
763 let value = reader
764 .decoder()
765 .decode(&attr.value)
766 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?
767 .to_string();
768
769 match key.as_ref() {
770 "id" => id = Some(value),
771 "href" => href = Some(value),
772 "media-type" => media_type = Some(value),
773 "properties" => properties = Some(value),
774 _ => {}
775 }
776 }
777
778 if let (Some(id), Some(href), Some(media_type)) = (id, href, media_type) {
779 Ok(Some(ManifestItem {
780 id,
781 href,
782 media_type,
783 properties,
784 }))
785 } else {
786 Ok(None)
787 }
788}
789
790#[cfg(feature = "std")]
791fn parse_guide_reference_reader<'a, R: std::io::BufRead>(
792 e: &quick_xml::events::BytesStart<'a>,
793 reader: &Reader<R>,
794) -> Result<Option<GuideRef>, EpubError> {
795 let mut guide_type = None;
796 let mut title = None;
797 let mut href = None;
798
799 for attr in e.attributes() {
800 let attr = attr.map_err(|e| EpubError::Parse(format!("Attr error: {:?}", e)))?;
801 let key = reader
802 .decoder()
803 .decode(attr.key.as_ref())
804 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?;
805 let value = reader
806 .decoder()
807 .decode(&attr.value)
808 .map_err(|e| EpubError::Parse(format!("Decode error: {:?}", e)))?
809 .to_string();
810
811 match key.as_ref() {
812 "type" => guide_type = Some(value),
813 "title" => title = Some(value),
814 "href" => href = Some(value),
815 _ => {}
816 }
817 }
818
819 if let (Some(guide_type), Some(href)) = (guide_type, href) {
820 Ok(Some(GuideRef {
821 guide_type,
822 title,
823 href,
824 }))
825 } else {
826 Ok(None)
827 }
828}
829
830pub fn extract_metadata(
836 container_xml: &[u8],
837 opf_content: &[u8],
838) -> Result<EpubMetadata, EpubError> {
839 let opf_path = parse_container_xml(container_xml)?;
841
842 let mut metadata = parse_opf(opf_content)?;
844
845 metadata.opf_path = Some(opf_path);
848
849 Ok(metadata)
850}
851
852#[cfg(feature = "std")]
854pub fn parse_container_xml_file<P: AsRef<std::path::Path>>(path: P) -> Result<String, EpubError> {
855 let file = std::fs::File::open(path)
856 .map_err(|e| EpubError::Io(format!("Failed to open container.xml: {}", e)))?;
857 let reader = std::io::BufReader::new(file);
858 parse_container_xml_reader(reader)
859}
860
861#[cfg(feature = "std")]
863pub fn parse_opf_file<P: AsRef<std::path::Path>>(path: P) -> Result<EpubMetadata, EpubError> {
864 let file = std::fs::File::open(path)
865 .map_err(|e| EpubError::Io(format!("Failed to open OPF: {}", e)))?;
866 let reader = std::io::BufReader::new(file);
867 parse_opf_reader(reader)
868}
869
870#[cfg(feature = "std")]
875pub fn extract_metadata_from_files<P: AsRef<std::path::Path>>(
876 container_path: P,
877 opf_path: P,
878) -> Result<EpubMetadata, EpubError> {
879 let opf_relative_path = parse_container_xml_file(&container_path)?;
881
882 let mut metadata = parse_opf_file(&opf_path)?;
884
885 metadata.opf_path = Some(opf_relative_path);
887
888 Ok(metadata)
889}
890
891#[cfg(test)]
892mod tests {
893 use super::*;
894
895 #[test]
896 fn test_parse_container_xml() {
897 let container = br#"<?xml version="1.0"?>
898<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
899 <rootfiles>
900 <rootfile full-path="EPUB/package.opf" media-type="application/oebps-package+xml"/>
901 </rootfiles>
902</container>"#;
903
904 let result = parse_container_xml(container).unwrap();
905 assert_eq!(result, "EPUB/package.opf");
906 }
907
908 #[test]
909 fn test_parse_opf_basic() {
910 let opf = br#"<?xml version="1.0"?>
911<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
912 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
913 <dc:title>Test Book</dc:title>
914 <dc:creator>Test Author</dc:creator>
915 <dc:language>en</dc:language>
916 </metadata>
917 <manifest>
918 <item id="cover" href="cover.xhtml" media-type="application/xhtml+xml"/>
919 <item id="chapter1" href="chapter1.xhtml" media-type="application/xhtml+xml"/>
920 </manifest>
921</package>"#;
922
923 let metadata = parse_opf(opf).unwrap();
924 assert_eq!(metadata.title, "Test Book");
925 assert_eq!(metadata.author, "Test Author");
926 assert_eq!(metadata.language, "en");
927 assert_eq!(metadata.manifest.len(), 2);
928 assert_eq!(metadata.manifest[0].id, "cover");
929 assert_eq!(metadata.manifest[1].href, "chapter1.xhtml");
930 }
931
932 #[test]
933 fn test_parse_opf_with_cover() {
934 let opf = br#"<?xml version="1.0"?>
935<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
936 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
937 <dc:title>Book with Cover</dc:title>
938 <meta name="cover" content="cover-image"/>
939 </metadata>
940 <manifest>
941 <item id="cover-image" href="images/cover.jpg" media-type="image/jpeg" properties="cover-image"/>
942 </manifest>
943</package>"#;
944
945 let metadata = parse_opf(opf).unwrap();
946 assert_eq!(metadata.title, "Book with Cover");
947 assert_eq!(metadata.cover_id, Some("cover-image".to_string()));
948 }
949
950 #[test]
951 fn test_get_item() {
952 let mut metadata = EpubMetadata::new();
953 metadata.manifest.push(ManifestItem {
954 id: "item1".to_string(),
955 href: "chapter1.xhtml".to_string(),
956 media_type: "application/xhtml+xml".to_string(),
957 properties: None,
958 });
959
960 let item = metadata.get_item("item1");
961 assert!(item.is_some());
962 assert_eq!(item.unwrap().href, "chapter1.xhtml");
963
964 assert!(metadata.get_item("nonexistent").is_none());
965 }
966
967 #[test]
968 fn test_parse_opf_dublin_core_date() {
969 let opf = br#"<?xml version="1.0"?>
970<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
971 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
972 <dc:title>Test Book</dc:title>
973 <dc:date>2024-01-15</dc:date>
974 </metadata>
975 <manifest/>
976</package>"#;
977
978 let metadata = parse_opf(opf).unwrap();
979 assert_eq!(metadata.date, Some("2024-01-15".to_string()));
980 }
981
982 #[test]
983 fn test_parse_opf_dublin_core_publisher() {
984 let opf = br#"<?xml version="1.0"?>
985<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
986 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
987 <dc:title>Test Book</dc:title>
988 <dc:publisher>Acme Publishing</dc:publisher>
989 </metadata>
990 <manifest/>
991</package>"#;
992
993 let metadata = parse_opf(opf).unwrap();
994 assert_eq!(metadata.publisher, Some("Acme Publishing".to_string()));
995 }
996
997 #[test]
998 fn test_parse_opf_dublin_core_rights() {
999 let opf = br#"<?xml version="1.0"?>
1000<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
1001 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1002 <dc:title>Test Book</dc:title>
1003 <dc:rights>Copyright 2024 Author</dc:rights>
1004 </metadata>
1005 <manifest/>
1006</package>"#;
1007
1008 let metadata = parse_opf(opf).unwrap();
1009 assert_eq!(metadata.rights, Some("Copyright 2024 Author".to_string()));
1010 }
1011
1012 #[test]
1013 fn test_parse_opf_dublin_core_description() {
1014 let opf = br#"<?xml version="1.0"?>
1015<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
1016 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1017 <dc:title>Test Book</dc:title>
1018 <dc:description>A fascinating story about testing parsers.</dc:description>
1019 </metadata>
1020 <manifest/>
1021</package>"#;
1022
1023 let metadata = parse_opf(opf).unwrap();
1024 assert_eq!(
1025 metadata.description,
1026 Some("A fascinating story about testing parsers.".to_string())
1027 );
1028 }
1029
1030 #[test]
1031 fn test_parse_opf_dublin_core_identifier() {
1032 let opf = br#"<?xml version="1.0"?>
1033<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
1034 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1035 <dc:title>Test Book</dc:title>
1036 <dc:identifier>urn:isbn:978-3-16-148410-0</dc:identifier>
1037 </metadata>
1038 <manifest/>
1039</package>"#;
1040
1041 let metadata = parse_opf(opf).unwrap();
1042 assert_eq!(
1043 metadata.identifier,
1044 Some("urn:isbn:978-3-16-148410-0".to_string())
1045 );
1046 }
1047
1048 #[test]
1049 fn test_parse_opf_single_subject() {
1050 let opf = br#"<?xml version="1.0"?>
1051<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
1052 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1053 <dc:title>Test Book</dc:title>
1054 <dc:subject>Fiction</dc:subject>
1055 </metadata>
1056 <manifest/>
1057</package>"#;
1058
1059 let metadata = parse_opf(opf).unwrap();
1060 assert_eq!(metadata.subjects, vec!["Fiction".to_string()]);
1061 }
1062
1063 #[test]
1064 fn test_parse_opf_multiple_subjects() {
1065 let opf = br#"<?xml version="1.0"?>
1066<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
1067 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1068 <dc:title>Test Book</dc:title>
1069 <dc:subject>Fiction</dc:subject>
1070 <dc:subject>Science Fiction</dc:subject>
1071 <dc:subject>Adventure</dc:subject>
1072 </metadata>
1073 <manifest/>
1074</package>"#;
1075
1076 let metadata = parse_opf(opf).unwrap();
1077 assert_eq!(metadata.subjects.len(), 3);
1078 assert_eq!(metadata.subjects[0], "Fiction");
1079 assert_eq!(metadata.subjects[1], "Science Fiction");
1080 assert_eq!(metadata.subjects[2], "Adventure");
1081 }
1082
1083 #[test]
1084 fn test_parse_opf_modified_date() {
1085 let opf = br#"<?xml version="1.0"?>
1086<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
1087 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1088 <dc:title>Test Book</dc:title>
1089 <meta property="dcterms:modified">2024-06-01T12:00:00Z</meta>
1090 </metadata>
1091 <manifest/>
1092</package>"#;
1093
1094 let metadata = parse_opf(opf).unwrap();
1095 assert_eq!(metadata.modified, Some("2024-06-01T12:00:00Z".to_string()));
1096 }
1097
1098 #[test]
1099 fn test_parse_opf_rendition_layout() {
1100 let opf = br#"<?xml version="1.0"?>
1101<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
1102 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1103 <dc:title>Test Book</dc:title>
1104 <meta property="rendition:layout">pre-paginated</meta>
1105 </metadata>
1106 <manifest/>
1107</package>"#;
1108
1109 let metadata = parse_opf(opf).unwrap();
1110 assert_eq!(metadata.rendition_layout, Some("pre-paginated".to_string()));
1111 }
1112
1113 #[test]
1114 fn test_parse_opf_rendition_layout_reflowable() {
1115 let opf = br#"<?xml version="1.0"?>
1116<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
1117 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1118 <dc:title>Test Book</dc:title>
1119 <meta property="rendition:layout">reflowable</meta>
1120 </metadata>
1121 <manifest/>
1122</package>"#;
1123
1124 let metadata = parse_opf(opf).unwrap();
1125 assert_eq!(metadata.rendition_layout, Some("reflowable".to_string()));
1126 }
1127
1128 #[test]
1129 fn test_parse_opf_guide_single_reference() {
1130 let opf = br#"<?xml version="1.0"?>
1131<package xmlns="http://www.idpf.org/2007/opf" version="2.0">
1132 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1133 <dc:title>Test Book</dc:title>
1134 </metadata>
1135 <manifest/>
1136 <guide>
1137 <reference type="cover" title="Cover" href="cover.xhtml"/>
1138 </guide>
1139</package>"#;
1140
1141 let metadata = parse_opf(opf).unwrap();
1142 assert_eq!(metadata.guide.len(), 1);
1143 assert_eq!(metadata.guide[0].guide_type, "cover");
1144 assert_eq!(metadata.guide[0].title, Some("Cover".to_string()));
1145 assert_eq!(metadata.guide[0].href, "cover.xhtml");
1146 }
1147
1148 #[test]
1149 fn test_parse_opf_guide_multiple_references() {
1150 let opf = br#"<?xml version="1.0"?>
1151<package xmlns="http://www.idpf.org/2007/opf" version="2.0">
1152 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1153 <dc:title>Test Book</dc:title>
1154 </metadata>
1155 <manifest/>
1156 <guide>
1157 <reference type="cover" title="Cover" href="cover.xhtml"/>
1158 <reference type="toc" title="Table of Contents" href="toc.xhtml"/>
1159 <reference type="text" title="Beginning" href="chapter1.xhtml"/>
1160 </guide>
1161</package>"#;
1162
1163 let metadata = parse_opf(opf).unwrap();
1164 assert_eq!(metadata.guide.len(), 3);
1165 assert_eq!(metadata.guide[0].guide_type, "cover");
1166 assert_eq!(metadata.guide[0].href, "cover.xhtml");
1167 assert_eq!(metadata.guide[1].guide_type, "toc");
1168 assert_eq!(
1169 metadata.guide[1].title,
1170 Some("Table of Contents".to_string())
1171 );
1172 assert_eq!(metadata.guide[1].href, "toc.xhtml");
1173 assert_eq!(metadata.guide[2].guide_type, "text");
1174 assert_eq!(metadata.guide[2].href, "chapter1.xhtml");
1175 }
1176
1177 #[test]
1178 fn test_parse_opf_guide_without_title() {
1179 let opf = br#"<?xml version="1.0"?>
1180<package xmlns="http://www.idpf.org/2007/opf" version="2.0">
1181 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1182 <dc:title>Test Book</dc:title>
1183 </metadata>
1184 <manifest/>
1185 <guide>
1186 <reference type="cover" href="cover.xhtml"/>
1187 </guide>
1188</package>"#;
1189
1190 let metadata = parse_opf(opf).unwrap();
1191 assert_eq!(metadata.guide.len(), 1);
1192 assert_eq!(metadata.guide[0].guide_type, "cover");
1193 assert_eq!(metadata.guide[0].title, None);
1194 assert_eq!(metadata.guide[0].href, "cover.xhtml");
1195 }
1196
1197 #[test]
1198 fn test_parse_opf_empty_optional_fields() {
1199 let opf = br#"<?xml version="1.0"?>
1200<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
1201 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1202 <dc:title>Minimal Book</dc:title>
1203 <dc:creator>Author</dc:creator>
1204 <dc:language>en</dc:language>
1205 </metadata>
1206 <manifest/>
1207</package>"#;
1208
1209 let metadata = parse_opf(opf).unwrap();
1210 assert_eq!(metadata.title, "Minimal Book");
1211 assert_eq!(metadata.author, "Author");
1212 assert_eq!(metadata.language, "en");
1213 assert_eq!(metadata.date, None);
1215 assert_eq!(metadata.publisher, None);
1216 assert_eq!(metadata.rights, None);
1217 assert_eq!(metadata.description, None);
1218 assert!(metadata.subjects.is_empty());
1219 assert_eq!(metadata.identifier, None);
1220 assert_eq!(metadata.modified, None);
1221 assert_eq!(metadata.rendition_layout, None);
1222 assert!(metadata.guide.is_empty());
1223 }
1224
1225 #[test]
1226 fn test_parse_opf_all_dublin_core_fields() {
1227 let opf = br#"<?xml version="1.0"?>
1228<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
1229 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1230 <dc:title>Complete Book</dc:title>
1231 <dc:creator>Jane Doe</dc:creator>
1232 <dc:language>fr</dc:language>
1233 <dc:date>2023-03-20</dc:date>
1234 <dc:publisher>Example Press</dc:publisher>
1235 <dc:rights>All rights reserved</dc:rights>
1236 <dc:description>A comprehensive test book.</dc:description>
1237 <dc:subject>Testing</dc:subject>
1238 <dc:subject>Software</dc:subject>
1239 <dc:identifier>urn:uuid:12345678-1234-1234-1234-123456789abc</dc:identifier>
1240 <meta property="dcterms:modified">2023-06-15T10:30:00Z</meta>
1241 <meta property="rendition:layout">reflowable</meta>
1242 </metadata>
1243 <manifest>
1244 <item id="ch1" href="chapter1.xhtml" media-type="application/xhtml+xml"/>
1245 </manifest>
1246 <guide>
1247 <reference type="toc" title="Contents" href="toc.xhtml"/>
1248 </guide>
1249</package>"#;
1250
1251 let metadata = parse_opf(opf).unwrap();
1252 assert_eq!(metadata.title, "Complete Book");
1253 assert_eq!(metadata.author, "Jane Doe");
1254 assert_eq!(metadata.language, "fr");
1255 assert_eq!(metadata.date, Some("2023-03-20".to_string()));
1256 assert_eq!(metadata.publisher, Some("Example Press".to_string()));
1257 assert_eq!(metadata.rights, Some("All rights reserved".to_string()));
1258 assert_eq!(
1259 metadata.description,
1260 Some("A comprehensive test book.".to_string())
1261 );
1262 assert_eq!(metadata.subjects, vec!["Testing", "Software"]);
1263 assert_eq!(
1264 metadata.identifier,
1265 Some("urn:uuid:12345678-1234-1234-1234-123456789abc".to_string())
1266 );
1267 assert_eq!(metadata.modified, Some("2023-06-15T10:30:00Z".to_string()));
1268 assert_eq!(metadata.rendition_layout, Some("reflowable".to_string()));
1269 assert_eq!(metadata.manifest.len(), 1);
1270 assert_eq!(metadata.guide.len(), 1);
1271 assert_eq!(metadata.guide[0].guide_type, "toc");
1272 }
1273
1274 #[test]
1275 fn test_parse_opf_backward_compat_basic() {
1276 let opf = br#"<?xml version="1.0"?>
1278<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
1279 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1280 <dc:title>Test Book</dc:title>
1281 <dc:creator>Test Author</dc:creator>
1282 <dc:language>en</dc:language>
1283 </metadata>
1284 <manifest>
1285 <item id="cover" href="cover.xhtml" media-type="application/xhtml+xml"/>
1286 <item id="chapter1" href="chapter1.xhtml" media-type="application/xhtml+xml"/>
1287 </manifest>
1288</package>"#;
1289
1290 let metadata = parse_opf(opf).unwrap();
1291 assert_eq!(metadata.title, "Test Book");
1292 assert_eq!(metadata.author, "Test Author");
1293 assert_eq!(metadata.language, "en");
1294 assert_eq!(metadata.manifest.len(), 2);
1295 assert_eq!(metadata.date, None);
1297 assert!(metadata.subjects.is_empty());
1298 assert!(metadata.guide.is_empty());
1299 }
1300
1301 #[test]
1302 fn test_extract_metadata_uses_container_xml_path() {
1303 let container = br#"<?xml version="1.0"?>
1304<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
1305 <rootfiles>
1306 <rootfile full-path="EPUB/package.opf" media-type="application/oebps-package+xml"/>
1307 </rootfiles>
1308</container>"#;
1309
1310 let opf = br#"<?xml version="1.0"?>
1311<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
1312 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1313 <dc:title>Test Book</dc:title>
1314 <dc:creator>Test Author</dc:creator>
1315 <dc:language>en</dc:language>
1316 </metadata>
1317 <manifest/>
1318</package>"#;
1319
1320 let metadata = extract_metadata(container, opf).unwrap();
1321 assert_eq!(metadata.title, "Test Book");
1322 assert_eq!(metadata.opf_path, Some("EPUB/package.opf".to_string()));
1323 }
1324
1325 #[test]
1326 fn test_extract_metadata_different_rootfile_path() {
1327 let container = br#"<?xml version="1.0"?>
1328<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
1329 <rootfiles>
1330 <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
1331 </rootfiles>
1332</container>"#;
1333
1334 let opf = br#"<?xml version="1.0"?>
1335<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
1336 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1337 <dc:title>Another Book</dc:title>
1338 <dc:creator>Another Author</dc:creator>
1339 <dc:language>fr</dc:language>
1340 </metadata>
1341 <manifest/>
1342</package>"#;
1343
1344 let metadata = extract_metadata(container, opf).unwrap();
1345 assert_eq!(metadata.title, "Another Book");
1346 assert_eq!(metadata.opf_path, Some("OEBPS/content.opf".to_string()));
1347 }
1348
1349 #[test]
1350 fn test_extract_cover_image_href_from_xhtml_img_src() {
1351 let xhtml = br#"<?xml version="1.0"?>
1352<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'>
1353<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
1354 <body>
1355 <div style="text-align: center">
1356 <img src="7086129663063996408_cover.jpg" alt="" class="x-ebookmaker-cover" />
1357 </div>
1358 </body>
1359</html>"#;
1360 let href = extract_cover_image_href_from_xhtml(xhtml)
1361 .expect("img src should be discovered from cover xhtml");
1362 assert_eq!(href, "7086129663063996408_cover.jpg");
1363 }
1364
1365 #[test]
1366 fn test_extract_cover_image_href_from_xhtml_svg_image() {
1367 let xhtml = br#"<?xml version="1.0" encoding="utf-8"?>
1368<html xmlns="http://www.w3.org/1999/xhtml" xmlns:svg="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
1369 <body>
1370 <svg:svg viewBox="0 0 100 100">
1371 <svg:image xlink:href="../images/cover.png" width="100" height="100"/>
1372 </svg:svg>
1373 </body>
1374</html>"#;
1375 let href = extract_cover_image_href_from_xhtml(xhtml)
1376 .expect("svg image href should be discovered from cover xhtml");
1377 assert_eq!(href, "../images/cover.png");
1378 }
1379}