1extern crate alloc;
8
9use alloc::format;
10use alloc::string::{String, ToString};
11use alloc::vec::Vec;
12use core::str;
13use quick_xml::events::Event;
14use quick_xml::reader::Reader;
15use std::fs::File;
16use std::io::{Read, Seek, Write};
17use std::path::Path;
18
19use crate::error::{
20 EpubError, ErrorLimitContext, ErrorPhase, LimitKind, PhaseError, PhaseErrorContext, ZipError,
21};
22use crate::metadata::{extract_metadata, EpubMetadata};
23use crate::navigation::{parse_nav_xhtml, parse_ncx, NavPoint, Navigation};
24use crate::render_prep::{
25 parse_font_faces_from_css, parse_stylesheet_links, ChapterStylesheets, EmbeddedFontFace,
26 FontLimits, RenderPrep, RenderPrepOptions, StyleLimits, StyledChapter, StyledEventOrRun,
27 StylesheetSource,
28};
29use crate::spine::Spine;
30
31use crate::tokenizer::{tokenize_html, Token};
32use crate::zip::{CdEntry, StreamingZip, ZipLimits};
33
34#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
36#[non_exhaustive]
37pub enum ValidationMode {
38 #[default]
40 Lenient,
41 Strict,
43}
44
45#[derive(Clone, Copy, Debug, PartialEq, Eq)]
47pub struct EpubBookOptions {
48 pub zip_limits: Option<ZipLimits>,
52 pub validation_mode: ValidationMode,
54 pub max_nav_bytes: Option<usize>,
56}
57
58impl Default for EpubBookOptions {
59 fn default() -> Self {
60 Self {
61 zip_limits: None,
62 validation_mode: ValidationMode::Lenient,
63 max_nav_bytes: None,
64 }
65 }
66}
67
68#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
70pub struct OpenConfig {
71 pub options: EpubBookOptions,
73 pub lazy_navigation: bool,
75}
76
77impl From<EpubBookOptions> for OpenConfig {
78 fn from(options: EpubBookOptions) -> Self {
79 Self {
80 options,
81 lazy_navigation: false,
82 }
83 }
84}
85
86#[derive(Clone, Copy, Debug, PartialEq)]
88pub struct ChapterEventsOptions {
89 pub render: RenderPrepOptions,
91 pub max_items: usize,
93}
94
95impl Default for ChapterEventsOptions {
96 fn default() -> Self {
97 Self {
98 render: RenderPrepOptions::default(),
99 max_items: 131_072,
100 }
101 }
102}
103
104#[derive(Clone, Copy, Debug, PartialEq, Eq)]
106pub struct ImageReadOptions {
107 pub max_bytes: usize,
109 pub allow_svg: bool,
111 pub allow_unknown_images: bool,
113}
114
115impl Default for ImageReadOptions {
116 fn default() -> Self {
117 Self {
118 max_bytes: 2 * 1024 * 1024,
119 allow_svg: true,
120 allow_unknown_images: false,
121 }
122 }
123}
124
125#[derive(Clone, Copy, Debug, PartialEq, Eq)]
127pub struct CoverImageOptions {
128 pub prefer_manifest_cover: bool,
130 pub include_guide_refs: bool,
132 pub parse_cover_documents: bool,
134 pub max_cover_document_bytes: usize,
136 pub image: ImageReadOptions,
138}
139
140impl Default for CoverImageOptions {
141 fn default() -> Self {
142 Self {
143 prefer_manifest_cover: true,
144 include_guide_refs: true,
145 parse_cover_documents: true,
146 max_cover_document_bytes: 256 * 1024,
147 image: ImageReadOptions::default(),
148 }
149 }
150}
151
152#[derive(Clone, Copy, Debug, PartialEq, Eq)]
154pub enum CoverImageSource {
155 Manifest,
157 Guide,
159 CoverDocument,
161}
162
163#[derive(Clone, Debug, PartialEq, Eq)]
165pub struct CoverImageRef {
166 pub zip_path: String,
168 pub href: String,
170 pub media_type: Option<String>,
172 pub source: CoverImageSource,
174}
175
176#[derive(Clone, Debug)]
180pub struct StreamingChapterOptions {
181 pub render_prep: RenderPrepOptions,
183 pub max_items: usize,
185 pub max_entry_bytes: usize,
187 pub chunk_limits: Option<crate::streaming::ChunkLimits>,
189 pub load_stylesheets: bool,
191}
192
193impl Default for StreamingChapterOptions {
194 fn default() -> Self {
195 Self {
196 render_prep: RenderPrepOptions::default(),
197 max_items: 131_072,
198 max_entry_bytes: 4 * 1024 * 1024, chunk_limits: None, load_stylesheets: false, }
202 }
203}
204
205impl StreamingChapterOptions {
206 pub fn embedded() -> Self {
208 Self {
209 render_prep: RenderPrepOptions::default(),
210 max_items: 10_000,
211 max_entry_bytes: 512 * 1024, chunk_limits: Some(crate::streaming::ChunkLimits::embedded()),
213 load_stylesheets: false,
214 }
215 }
216
217 pub fn with_chunk_limits(mut self, limits: crate::streaming::ChunkLimits) -> Self {
219 self.chunk_limits = Some(limits);
220 self
221 }
222
223 pub fn with_stylesheets(mut self, load: bool) -> Self {
225 self.load_stylesheets = load;
226 self
227 }
228}
229
230#[derive(Clone, Debug)]
232pub struct ChapterStreamResult {
233 pub items_emitted: usize,
235 pub bytes_read: usize,
237 pub complete: bool,
239}
240
241#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
243pub struct EpubBookBuilder {
244 options: EpubBookOptions,
245}
246
247impl EpubBookBuilder {
248 pub fn new() -> Self {
250 Self::default()
251 }
252
253 pub fn with_zip_limits(mut self, limits: ZipLimits) -> Self {
255 self.options.zip_limits = Some(limits);
256 self
257 }
258
259 pub fn strict(mut self) -> Self {
261 self.options.validation_mode = ValidationMode::Strict;
262 self
263 }
264
265 pub fn validation_mode(mut self, mode: ValidationMode) -> Self {
267 self.options.validation_mode = mode;
268 self
269 }
270
271 pub fn with_max_nav_bytes(mut self, max_nav_bytes: usize) -> Self {
273 self.options.max_nav_bytes = Some(max_nav_bytes);
274 self
275 }
276
277 pub fn open<P: AsRef<Path>>(self, path: P) -> Result<EpubBook<File>, EpubError> {
279 EpubBook::open_with_options(path, self.options)
280 }
281
282 pub fn from_reader<R: Read + Seek>(self, reader: R) -> Result<EpubBook<R>, EpubError> {
284 EpubBook::from_reader_with_options(reader, self.options)
285 }
286
287 pub fn parse_file<P: AsRef<Path>>(self, path: P) -> Result<EpubSummary, EpubError> {
289 parse_epub_file_with_options(path, self.options)
290 }
291
292 pub fn parse_reader<R: Read + Seek>(self, reader: R) -> Result<EpubSummary, EpubError> {
294 parse_epub_reader_with_options(reader, self.options)
295 }
296}
297
298#[derive(Clone, Debug, PartialEq, Eq)]
300pub struct EpubSummary {
301 metadata: EpubMetadata,
302 spine: Spine,
303 navigation: Option<Navigation>,
304}
305
306impl EpubSummary {
307 pub fn metadata(&self) -> &EpubMetadata {
309 &self.metadata
310 }
311
312 pub fn spine(&self) -> &Spine {
314 &self.spine
315 }
316
317 pub fn navigation(&self) -> Option<&Navigation> {
319 self.navigation.as_ref()
320 }
321}
322
323pub fn parse_epub_reader<R: Read + Seek>(reader: R) -> Result<EpubSummary, EpubError> {
325 parse_epub_reader_with_options(reader, EpubBookOptions::default())
326}
327
328pub fn parse_epub_reader_with_options<R: Read + Seek>(
330 reader: R,
331 options: EpubBookOptions,
332) -> Result<EpubSummary, EpubError> {
333 let mut zip =
334 StreamingZip::new_with_limits(reader, options.zip_limits).map_err(EpubError::Zip)?;
335 load_summary_from_zip(&mut zip, options)
336}
337
338pub fn parse_epub_file<P: AsRef<Path>>(path: P) -> Result<EpubSummary, EpubError> {
340 parse_epub_file_with_options(path, EpubBookOptions::default())
341}
342
343pub fn parse_epub_file_with_options<P: AsRef<Path>>(
345 path: P,
346 options: EpubBookOptions,
347) -> Result<EpubSummary, EpubError> {
348 let file = File::open(path).map_err(|e| EpubError::Io(e.to_string()))?;
349 parse_epub_reader_with_options(file, options)
350}
351
352pub struct EpubBook<R: Read + Seek> {
354 zip: StreamingZip<R>,
355 opf_path: String,
356 metadata: EpubMetadata,
357 spine: Spine,
358 validation_mode: ValidationMode,
359 max_nav_bytes: Option<usize>,
360 navigation_loaded: bool,
361 navigation: Option<Navigation>,
362 embedded_fonts_cache: Option<Vec<EmbeddedFontFace>>,
363}
364
365#[derive(Clone, Debug, PartialEq, Eq)]
367pub struct ChapterRef {
368 pub index: usize,
370 pub idref: String,
372 pub href: String,
374 pub media_type: String,
376}
377
378#[derive(Clone, Debug, Default, PartialEq, Eq)]
380pub struct ReadingPosition {
381 pub chapter_index: usize,
383 pub chapter_href: Option<String>,
385 pub anchor: Option<String>,
387 pub fallback_offset: usize,
389}
390
391#[derive(Clone, Debug, PartialEq, Eq)]
393pub enum Locator {
394 Chapter(usize),
396 Href(String),
398 Fragment(String),
400 TocId(String),
402 Position(ReadingPosition),
404}
405
406#[derive(Clone, Debug, PartialEq, Eq)]
408pub struct ResolvedLocation {
409 pub chapter: ChapterRef,
411 pub fragment: Option<String>,
413 pub position: ReadingPosition,
415}
416
417#[derive(Clone, Debug, PartialEq, Eq)]
419pub struct ReadingSession {
420 chapters: Vec<ChapterRef>,
421 navigation: Option<Navigation>,
422 current: ReadingPosition,
423}
424
425impl ReadingSession {
426 pub fn new(chapters: Vec<ChapterRef>, navigation: Option<Navigation>) -> Self {
428 let first_href = chapters.first().map(|c| c.href.clone());
429 Self {
430 chapters,
431 navigation,
432 current: ReadingPosition {
433 chapter_index: 0,
434 chapter_href: first_href,
435 anchor: None,
436 fallback_offset: 0,
437 },
438 }
439 }
440
441 pub fn current_position(&self) -> ReadingPosition {
443 self.current.clone()
444 }
445
446 pub fn seek_position(&mut self, pos: &ReadingPosition) -> Result<(), EpubError> {
448 if pos.chapter_index >= self.chapters.len() {
449 return Err(EpubError::ChapterOutOfBounds {
450 index: pos.chapter_index,
451 chapter_count: self.chapters.len(),
452 });
453 }
454 self.current = pos.clone();
455 if self.current.chapter_href.is_none() {
456 self.current.chapter_href = Some(self.chapters[pos.chapter_index].href.clone());
457 }
458 Ok(())
459 }
460
461 pub fn chapter_progress(&self) -> f32 {
463 if self.chapters.is_empty() {
464 return 0.0;
465 }
466 if self.current.fallback_offset == 0 {
467 0.0
468 } else {
469 1.0
470 }
471 }
472
473 pub fn book_progress(&self) -> f32 {
475 if self.chapters.is_empty() {
476 return 0.0;
477 }
478 let chapter_ratio = self.chapter_progress();
479 ((self.current.chapter_index as f32) + chapter_ratio) / (self.chapters.len() as f32)
480 }
481
482 pub fn resolve_locator(&mut self, loc: Locator) -> Result<ResolvedLocation, EpubError> {
484 match loc {
485 Locator::Chapter(index) => {
486 let chapter =
487 self.chapters
488 .get(index)
489 .cloned()
490 .ok_or(EpubError::ChapterOutOfBounds {
491 index,
492 chapter_count: self.chapters.len(),
493 })?;
494 self.current.chapter_index = index;
495 self.current.chapter_href = Some(chapter.href.clone());
496 self.current.anchor = None;
497 Ok(ResolvedLocation {
498 chapter,
499 fragment: None,
500 position: self.current.clone(),
501 })
502 }
503 Locator::Href(href) => {
504 let (base, fragment) = split_href_fragment(&href);
505 let (index, chapter) = self
506 .chapters
507 .iter()
508 .enumerate()
509 .find(|(_, chapter)| chapter.href == base)
510 .map(|(idx, chapter)| (idx, chapter.clone()))
511 .ok_or_else(|| {
512 EpubError::InvalidEpub(format!("unknown chapter href: {}", href))
513 })?;
514 self.current.chapter_index = index;
515 self.current.chapter_href = Some(chapter.href.clone());
516 self.current.anchor = fragment.clone();
517 Ok(ResolvedLocation {
518 chapter,
519 fragment,
520 position: self.current.clone(),
521 })
522 }
523 Locator::Fragment(fragment) => {
524 let idx = self
525 .current
526 .chapter_index
527 .min(self.chapters.len().saturating_sub(1));
528 let chapter =
529 self.chapters
530 .get(idx)
531 .cloned()
532 .ok_or(EpubError::ChapterOutOfBounds {
533 index: idx,
534 chapter_count: self.chapters.len(),
535 })?;
536 self.current.chapter_index = idx;
537 self.current.chapter_href = Some(chapter.href.clone());
538 self.current.anchor = Some(fragment.clone());
539 Ok(ResolvedLocation {
540 chapter,
541 fragment: Some(fragment),
542 position: self.current.clone(),
543 })
544 }
545 Locator::TocId(id) => {
546 let nav = self.navigation.as_ref().ok_or_else(|| {
547 EpubError::Navigation("no navigation document available".to_string())
548 })?;
549 let href = find_toc_href(nav, &id).ok_or_else(|| {
550 EpubError::Navigation(format!("toc id/label not found: {}", id))
551 })?;
552 self.resolve_locator(Locator::Href(href))
553 }
554 Locator::Position(pos) => {
555 self.seek_position(&pos)?;
556 self.resolve_locator(Locator::Chapter(pos.chapter_index))
557 }
558 }
559 }
560}
561
562#[derive(Clone, Debug)]
567pub struct PaginationSession {
568 chapter_index: usize,
570 byte_offset: usize,
572 event_index: usize,
574 element_stack: Vec<String>,
576 page_number: usize,
578 chapter_complete: bool,
580}
581
582impl PaginationSession {
583 pub fn new() -> Self {
585 Self {
586 chapter_index: 0,
587 byte_offset: 0,
588 event_index: 0,
589 element_stack: Vec::with_capacity(32),
590 page_number: 0,
591 chapter_complete: false,
592 }
593 }
594
595 pub fn at_position(chapter: usize, byte_offset: usize, event_index: usize) -> Self {
597 Self {
598 chapter_index: chapter,
599 byte_offset,
600 event_index,
601 element_stack: Vec::with_capacity(32),
602 page_number: 0,
603 chapter_complete: false,
604 }
605 }
606
607 pub fn chapter_index(&self) -> usize {
609 self.chapter_index
610 }
611
612 pub fn byte_offset(&self) -> usize {
614 self.byte_offset
615 }
616
617 pub fn event_index(&self) -> usize {
619 self.event_index
620 }
621
622 pub fn page_number(&self) -> usize {
624 self.page_number
625 }
626
627 pub fn is_chapter_complete(&self) -> bool {
629 self.chapter_complete
630 }
631
632 pub fn next_page(&mut self) {
634 self.page_number += 1;
635 }
636
637 pub fn advance(&mut self, bytes: usize, events: usize) {
639 self.byte_offset += bytes;
640 self.event_index += events;
641 }
642
643 pub fn push_element(&mut self, tag: &str) {
645 self.element_stack.push(tag.to_string());
646 }
647
648 pub fn pop_element(&mut self) -> Option<String> {
650 self.element_stack.pop()
651 }
652
653 pub fn next_chapter(&mut self) {
655 self.chapter_index += 1;
656 self.byte_offset = 0;
657 self.event_index = 0;
658 self.element_stack.clear();
659 self.chapter_complete = false;
660 }
661
662 pub fn mark_chapter_complete(&mut self) {
664 self.chapter_complete = true;
665 }
666
667 pub fn reset_chapter(&mut self, chapter_index: usize) {
669 self.chapter_index = chapter_index;
670 self.byte_offset = 0;
671 self.event_index = 0;
672 self.element_stack.clear();
673 self.chapter_complete = false;
674 }
675
676 pub fn to_position(&self) -> ReadingPosition {
678 ReadingPosition {
679 chapter_index: self.chapter_index,
680 chapter_href: None,
681 anchor: None,
682 fallback_offset: self.byte_offset,
683 }
684 }
685}
686
687impl Default for PaginationSession {
688 fn default() -> Self {
689 Self::new()
690 }
691}
692
693fn split_href_fragment(href: &str) -> (String, Option<String>) {
694 if let Some((base, fragment)) = href.split_once('#') {
695 return (base.to_string(), Some(fragment.to_string()));
696 }
697 (href.to_string(), None)
698}
699
700fn find_toc_href(nav: &Navigation, id: &str) -> Option<String> {
701 fn visit(points: &[NavPoint], id: &str) -> Option<String> {
702 for point in points {
703 let (_, fragment) = split_href_fragment(&point.href);
704 if point.label == id || fragment.as_deref() == Some(id) {
705 return Some(point.href.clone());
706 }
707 if let Some(hit) = visit(&point.children, id) {
708 return Some(hit);
709 }
710 }
711 None
712 }
713 visit(&nav.toc, id)
714}
715
716impl EpubBook<File> {
717 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, EpubError> {
719 Self::open_with_options(path, EpubBookOptions::default())
720 }
721
722 pub fn open_with_options<P: AsRef<Path>>(
724 path: P,
725 options: EpubBookOptions,
726 ) -> Result<Self, EpubError> {
727 let file = File::open(path).map_err(|e| EpubError::Io(e.to_string()))?;
728 Self::from_reader_with_options(file, options)
729 }
730
731 pub fn open_with_config<P: AsRef<Path>>(
733 path: P,
734 config: OpenConfig,
735 ) -> Result<Self, EpubError> {
736 let file = File::open(path).map_err(|e| EpubError::Io(e.to_string()))?;
737 Self::from_reader_with_config(file, config)
738 }
739
740 #[cfg(feature = "std")]
771 pub fn open_with_temp_storage<EP: AsRef<Path>, TP: AsRef<Path>>(
772 epub_path: EP,
773 temp_dir: TP,
774 config: OpenConfig,
775 ) -> Result<Self, EpubError> {
776 use crate::metadata::{parse_container_xml_file, parse_opf_file};
777
778 let options = config.options;
779 let mut zip = StreamingZip::new_with_limits(
780 File::open(&epub_path).map_err(|e| EpubError::Io(e.to_string()))?,
781 options.zip_limits,
782 )
783 .map_err(EpubError::Zip)?;
784
785 zip.validate_mimetype().map_err(EpubError::Zip)?;
786
787 let temp_dir = temp_dir.as_ref();
789 let container_temp = temp_dir.join(".epub_stream_container.xml");
790 let opf_temp = temp_dir.join(".epub_stream_opf.xml");
791
792 let mut container_file = File::create(&container_temp)
794 .map_err(|e| EpubError::Io(format!("Failed to create temp file: {}", e)))?;
795 read_entry_into(&mut zip, "META-INF/container.xml", &mut container_file)?;
796 drop(container_file);
797
798 let opf_path = parse_container_xml_file(&container_temp)
800 .map_err(|e| EpubError::Parse(format!("Failed to parse container.xml: {}", e)))?;
801
802 let _ = std::fs::remove_file(&container_temp);
804
805 let opf_entry = zip
807 .get_entry(&opf_path)
808 .ok_or(EpubError::Zip(ZipError::FileNotFound))?;
809
810 let opf_entry_data = CdEntry {
812 method: opf_entry.method,
813 compressed_size: opf_entry.compressed_size,
814 uncompressed_size: opf_entry.uncompressed_size,
815 local_header_offset: opf_entry.local_header_offset,
816 crc32: opf_entry.crc32,
817 filename: String::with_capacity(0),
818 };
819
820 let mut opf_file = File::create(&opf_temp)
822 .map_err(|e| EpubError::Io(format!("Failed to create temp file: {}", e)))?;
823 zip.read_file_to_writer(&opf_entry_data, &mut opf_file)
824 .map_err(EpubError::Zip)?;
825 drop(opf_file);
826
827 let mut metadata = parse_opf_file(&opf_temp)
829 .map_err(|e| EpubError::Parse(format!("Failed to parse OPF: {}", e)))?;
830
831 metadata.opf_path = Some(opf_path.clone());
833
834 let spine = crate::spine::parse_spine_file(&opf_temp)?;
836
837 let _ = std::fs::remove_file(&opf_temp);
839
840 validate_open_invariants(&metadata, &spine, options.validation_mode)?;
841
842 let (navigation, navigation_loaded) = if config.lazy_navigation {
844 (None, false)
845 } else {
846 (
847 parse_navigation(
848 &mut zip,
849 &metadata,
850 &spine,
851 &opf_path,
852 options.validation_mode,
853 options.max_nav_bytes,
854 )?,
855 true,
856 )
857 };
858
859 Ok(Self {
860 zip,
861 opf_path,
862 metadata,
863 spine,
864 validation_mode: options.validation_mode,
865 max_nav_bytes: options.max_nav_bytes,
866 navigation_loaded,
867 navigation,
868 embedded_fonts_cache: None,
869 })
870 }
871}
872
873impl<R: Read + Seek> EpubBook<R> {
874 pub fn from_reader(reader: R) -> Result<Self, EpubError> {
881 Self::from_reader_with_options(reader, EpubBookOptions::default())
882 }
883
884 pub fn from_reader_with_options(
891 reader: R,
892 options: EpubBookOptions,
893 ) -> Result<Self, EpubError> {
894 Self::from_reader_with_config(reader, OpenConfig::from(options))
895 }
896
897 pub fn from_reader_with_config(reader: R, config: OpenConfig) -> Result<Self, EpubError> {
904 let options = config.options;
905 let mut zip =
906 StreamingZip::new_with_limits(reader, options.zip_limits).map_err(EpubError::Zip)?;
907 zip.validate_mimetype().map_err(EpubError::Zip)?;
908
909 let container = read_entry(&mut zip, "META-INF/container.xml")?;
910 let opf_path = crate::metadata::parse_container_xml(&container)?;
911 let opf = read_entry(&mut zip, &opf_path)?;
912 let metadata = extract_metadata(&container, &opf)?;
913 let spine = crate::spine::parse_spine(&opf)?;
914 validate_open_invariants(&metadata, &spine, options.validation_mode)?;
915 let (navigation, navigation_loaded) = if config.lazy_navigation {
916 (None, false)
917 } else {
918 (
919 parse_navigation(
920 &mut zip,
921 &metadata,
922 &spine,
923 &opf_path,
924 options.validation_mode,
925 options.max_nav_bytes,
926 )?,
927 true,
928 )
929 };
930
931 Ok(Self {
932 zip,
933 opf_path,
934 metadata,
935 spine,
936 validation_mode: options.validation_mode,
937 max_nav_bytes: options.max_nav_bytes,
938 navigation_loaded,
939 navigation,
940 embedded_fonts_cache: None,
941 })
942 }
943
944 pub fn metadata(&self) -> &EpubMetadata {
946 &self.metadata
947 }
948
949 pub fn title(&self) -> &str {
951 self.metadata.title.as_str()
952 }
953
954 pub fn author(&self) -> &str {
956 self.metadata.author.as_str()
957 }
958
959 pub fn language(&self) -> &str {
961 self.metadata.language.as_str()
962 }
963
964 pub fn spine(&self) -> &Spine {
966 &self.spine
967 }
968
969 pub fn navigation(&self) -> Option<&Navigation> {
971 self.navigation.as_ref()
972 }
973
974 pub fn ensure_navigation(&mut self) -> Result<Option<&Navigation>, EpubError> {
976 if !self.navigation_loaded {
977 self.navigation = parse_navigation(
978 &mut self.zip,
979 &self.metadata,
980 &self.spine,
981 &self.opf_path,
982 self.validation_mode,
983 self.max_nav_bytes,
984 )?;
985 self.navigation_loaded = true;
986 }
987 Ok(self.navigation.as_ref())
988 }
989
990 pub fn toc(&self) -> Option<&[NavPoint]> {
992 self.navigation.as_ref().map(|n| n.toc.as_slice())
993 }
994
995 pub fn chapter_count(&self) -> usize {
997 self.spine.len()
998 }
999
1000 pub fn chapter_uncompressed_size(&mut self, index: usize) -> Result<usize, EpubError> {
1005 let chapter = self.chapter(index)?;
1006 let zip_path = resolve_opf_relative_path(&self.opf_path, &chapter.href);
1007 let entry = self
1008 .zip
1009 .get_entry(&zip_path)
1010 .ok_or(EpubError::Zip(ZipError::FileNotFound))?;
1011 usize::try_from(entry.uncompressed_size).map_err(|_| EpubError::Zip(ZipError::FileTooLarge))
1012 }
1013
1014 pub fn reading_session(&self) -> ReadingSession {
1016 ReadingSession::new(self.chapters().collect(), self.navigation.clone())
1017 }
1018
1019 pub fn chapters(&self) -> impl Iterator<Item = ChapterRef> + '_ {
1021 self.spine
1022 .items()
1023 .iter()
1024 .enumerate()
1025 .filter_map(|(index, spine_item)| {
1026 self.metadata
1027 .get_item(&spine_item.idref)
1028 .map(|manifest_item| ChapterRef {
1029 index,
1030 idref: spine_item.idref.clone(),
1031 href: manifest_item.href.clone(),
1032 media_type: manifest_item.media_type.clone(),
1033 })
1034 })
1035 }
1036
1037 pub fn chapter(&self, index: usize) -> Result<ChapterRef, EpubError> {
1039 let spine_item = self
1040 .spine
1041 .get_item(index)
1042 .ok_or(EpubError::ChapterOutOfBounds {
1043 index,
1044 chapter_count: self.spine.len(),
1045 })?;
1046
1047 let manifest_item = self.metadata.get_item(&spine_item.idref).ok_or_else(|| {
1048 EpubError::ManifestItemMissing {
1049 idref: spine_item.idref.clone(),
1050 }
1051 })?;
1052
1053 Ok(ChapterRef {
1054 index,
1055 idref: spine_item.idref.clone(),
1056 href: manifest_item.href.clone(),
1057 media_type: manifest_item.media_type.clone(),
1058 })
1059 }
1060
1061 pub fn chapter_by_id(&self, idref: &str) -> Result<ChapterRef, EpubError> {
1063 let index = self
1064 .spine
1065 .items()
1066 .iter()
1067 .position(|item| item.idref == idref)
1068 .ok_or_else(|| EpubError::ManifestItemMissing {
1069 idref: idref.to_string(),
1070 })?;
1071 self.chapter(index)
1072 }
1073
1074 pub fn read_resource(&mut self, href: &str) -> Result<Vec<u8>, EpubError> {
1086 let mut out = Vec::with_capacity(0);
1087 self.read_resource_into(href, &mut out)?;
1088 Ok(out)
1089 }
1090
1091 pub fn read_resource_into<W: Write>(
1100 &mut self,
1101 href: &str,
1102 writer: &mut W,
1103 ) -> Result<usize, EpubError> {
1104 self.read_resource_into_with_hard_cap(href, writer, usize::MAX)
1105 }
1106
1107 pub fn read_resource_into_with_limit<W: Write>(
1111 &mut self,
1112 href: &str,
1113 writer: &mut W,
1114 max_bytes: usize,
1115 ) -> Result<usize, EpubError> {
1116 self.read_resource_into_with_hard_cap(href, writer, max_bytes)
1117 }
1118
1119 pub fn read_resource_into_with_hard_cap<W: Write>(
1123 &mut self,
1124 href: &str,
1125 writer: &mut W,
1126 hard_cap_bytes: usize,
1127 ) -> Result<usize, EpubError> {
1128 let zip_path = resolve_opf_relative_path(&self.opf_path, href);
1129 read_entry_into_with_limit(&mut self.zip, &zip_path, writer, hard_cap_bytes)
1130 }
1131
1132 pub fn read_spine_item_bytes(&mut self, index: usize) -> Result<Vec<u8>, EpubError> {
1134 let href = self.chapter(index)?.href;
1135
1136 self.read_resource(&href)
1137 }
1138
1139 pub fn read_image_resource_into_with_options(
1145 &mut self,
1146 href: &str,
1147 out: &mut Vec<u8>,
1148 options: ImageReadOptions,
1149 ) -> Result<usize, EpubError> {
1150 let zip_path = resolve_opf_relative_path(&self.opf_path, href);
1151 let media_type = self
1152 .manifest_item_by_zip_path(&zip_path)
1153 .map(|item| item.media_type.as_str());
1154 if !is_supported_image_resource(media_type, &zip_path, options) {
1155 return Err(EpubError::Parse(format!(
1156 "resource is not an allowed image: {}",
1157 href
1158 )));
1159 }
1160 read_entry_into_with_limit(&mut self.zip, &zip_path, out, options.max_bytes)
1161 }
1162
1163 pub fn cover_image_ref(&mut self) -> Result<Option<CoverImageRef>, EpubError> {
1165 self.cover_image_ref_with_options(CoverImageOptions::default())
1166 }
1167
1168 pub fn cover_image_ref_with_options(
1170 &mut self,
1171 options: CoverImageOptions,
1172 ) -> Result<Option<CoverImageRef>, EpubError> {
1173 #[derive(Clone)]
1174 struct Candidate {
1175 href: String,
1176 media_type: Option<String>,
1177 source: CoverImageSource,
1178 }
1179
1180 let mut candidates: Vec<Candidate> = Vec::with_capacity(4);
1181 if let Some(item) = self.metadata.get_cover_item() {
1182 candidates.push(Candidate {
1183 href: item.href.clone(),
1184 media_type: Some(item.media_type.clone()),
1185 source: CoverImageSource::Manifest,
1186 });
1187 }
1188 if options.include_guide_refs {
1189 for guide_ref in &self.metadata.guide {
1190 if !guide_ref.guide_type.eq_ignore_ascii_case("cover") {
1191 continue;
1192 }
1193 if candidates
1194 .iter()
1195 .any(|existing| existing.href == guide_ref.href)
1196 {
1197 continue;
1198 }
1199 candidates.push(Candidate {
1200 href: guide_ref.href.clone(),
1201 media_type: None,
1202 source: CoverImageSource::Guide,
1203 });
1204 }
1205 }
1206 if !options.prefer_manifest_cover && !candidates.is_empty() {
1207 candidates.rotate_left(1);
1208 }
1209
1210 let mut doc_buf = Vec::with_capacity(0);
1211 for candidate in candidates {
1212 let zip_path = resolve_opf_relative_path(&self.opf_path, &candidate.href);
1213 if is_supported_image_resource(
1214 candidate.media_type.as_deref(),
1215 &zip_path,
1216 options.image,
1217 ) {
1218 let media_type =
1219 normalized_image_media_type(candidate.media_type.as_deref(), &zip_path);
1220 return Ok(Some(CoverImageRef {
1221 zip_path,
1222 href: candidate.href,
1223 media_type,
1224 source: candidate.source,
1225 }));
1226 }
1227 if !options.parse_cover_documents
1228 || !is_cover_document(candidate.media_type.as_deref(), &zip_path)
1229 {
1230 continue;
1231 }
1232 doc_buf.clear();
1233 if read_entry_into_with_limit(
1234 &mut self.zip,
1235 &zip_path,
1236 &mut doc_buf,
1237 options.max_cover_document_bytes,
1238 )
1239 .is_err()
1240 {
1241 continue;
1242 }
1243 let Some(nested_href) =
1244 crate::metadata::extract_cover_image_href_from_xhtml(doc_buf.as_slice())
1245 else {
1246 continue;
1247 };
1248 let nested_zip_path = resolve_opf_relative_path(&zip_path, &nested_href);
1249 let nested_manifest_item = self.manifest_item_by_zip_path(&nested_zip_path);
1250 let nested_media = nested_manifest_item.map(|item| item.media_type.as_str());
1251 if !is_supported_image_resource(nested_media, &nested_zip_path, options.image) {
1252 continue;
1253 }
1254 let media_type = normalized_image_media_type(nested_media, &nested_zip_path);
1255 let href = nested_manifest_item
1256 .map(|item| item.href.clone())
1257 .unwrap_or_else(|| nested_zip_path.clone());
1258 return Ok(Some(CoverImageRef {
1259 zip_path: nested_zip_path,
1260 href,
1261 media_type,
1262 source: CoverImageSource::CoverDocument,
1263 }));
1264 }
1265
1266 Ok(None)
1267 }
1268
1269 pub fn read_cover_image_into_with_options(
1273 &mut self,
1274 out: &mut Vec<u8>,
1275 options: CoverImageOptions,
1276 ) -> Result<Option<CoverImageRef>, EpubError> {
1277 let Some(cover) = self.cover_image_ref_with_options(options)? else {
1278 return Ok(None);
1279 };
1280 out.clear();
1281 read_entry_into_with_limit(&mut self.zip, &cover.zip_path, out, options.image.max_bytes)?;
1282 Ok(Some(cover))
1283 }
1284
1285 pub fn read_cover_image_into(
1287 &mut self,
1288 out: &mut Vec<u8>,
1289 ) -> Result<Option<CoverImageRef>, EpubError> {
1290 self.read_cover_image_into_with_options(out, CoverImageOptions::default())
1291 }
1292
1293 fn manifest_item_by_zip_path(&self, zip_path: &str) -> Option<&crate::metadata::ManifestItem> {
1294 self.metadata.manifest.iter().find(|item| {
1295 let item_zip_path = resolve_opf_relative_path(&self.opf_path, &item.href);
1296 item_zip_path == zip_path
1297 })
1298 }
1299
1300 pub fn chapter_html(&mut self, index: usize) -> Result<String, EpubError> {
1310 let mut out = String::with_capacity(0);
1311 self.chapter_html_into(index, &mut out)?;
1312 Ok(out)
1313 }
1314
1315 pub fn chapter_html_into(&mut self, index: usize, out: &mut String) -> Result<(), EpubError> {
1322 self.chapter_html_into_with_limit(index, usize::MAX, out)
1323 }
1324
1325 pub fn chapter_html_into_with_limit(
1327 &mut self,
1328 index: usize,
1329 max_bytes: usize,
1330 out: &mut String,
1331 ) -> Result<(), EpubError> {
1332 out.clear();
1333 let chapter = self.chapter(index)?;
1334 let mut bytes = Vec::with_capacity(0);
1335 self.read_resource_into_with_hard_cap(&chapter.href, &mut bytes, max_bytes)?;
1336 let mut html = String::from_utf8(bytes)
1337 .map_err(|_| EpubError::ChapterNotUtf8 { href: chapter.href })?;
1338 core::mem::swap(out, &mut html);
1339 Ok(())
1340 }
1341
1342 pub fn chapter_stylesheets(&mut self, index: usize) -> Result<ChapterStylesheets, EpubError> {
1344 self.chapter_stylesheets_with_options(index, StyleLimits::default())
1345 }
1346
1347 pub fn chapter_stylesheets_with_options(
1349 &mut self,
1350 index: usize,
1351 limits: StyleLimits,
1352 ) -> Result<ChapterStylesheets, EpubError> {
1353 let chapter = self.chapter(index)?;
1354 let html = self.chapter_html(index)?;
1355 let links = parse_stylesheet_links(&chapter.href, &html);
1356 let mut sources = Vec::with_capacity(0);
1357
1358 for href in links {
1359 let bytes = self.read_resource(&href)?;
1360 if bytes.len() > limits.max_css_bytes {
1361 return Err(EpubError::Parse(format!(
1362 "Stylesheet exceeds max_css_bytes ({} > {}) at '{}'",
1363 bytes.len(),
1364 limits.max_css_bytes,
1365 href
1366 )));
1367 }
1368 let css = String::from_utf8(bytes)
1369 .map_err(|_| EpubError::Parse(format!("Stylesheet is not UTF-8: {}", href)))?;
1370 sources.push(StylesheetSource { href, css });
1371 }
1372
1373 Ok(ChapterStylesheets { sources })
1374 }
1375
1376 pub fn styles_for_chapter(
1378 &mut self,
1379 index: usize,
1380 limits: StyleLimits,
1381 ) -> Result<ChapterStylesheets, EpubError> {
1382 self.chapter_stylesheets_with_options(index, limits)
1383 }
1384
1385 pub fn chapter_stylesheets_with_scratch(
1395 &mut self,
1396 index: usize,
1397 limits: StyleLimits,
1398 scratch_buf: &mut Vec<u8>,
1399 ) -> Result<ChapterStylesheets, EpubError> {
1400 let chapter = self.chapter(index)?;
1401 let html = self.chapter_html(index)?;
1402 let links = parse_stylesheet_links(&chapter.href, &html);
1403 let mut sources = Vec::with_capacity(links.len());
1404
1405 for href in links {
1406 scratch_buf.clear();
1407 self.read_resource_into(&href, scratch_buf)
1408 .map_err(|_| EpubError::Zip(ZipError::FileNotFound))?;
1409
1410 if scratch_buf.len() > limits.max_css_bytes {
1411 return Err(EpubError::LimitExceeded {
1412 kind: LimitKind::CssSize,
1413 actual: scratch_buf.len(),
1414 limit: limits.max_css_bytes,
1415 path: Some(href.clone()),
1416 });
1417 }
1418
1419 let css = String::from_utf8(scratch_buf.clone())
1420 .map_err(|_| EpubError::ChapterNotUtf8 { href: href.clone() })?;
1421
1422 sources.push(StylesheetSource { href, css });
1423 }
1424
1425 Ok(ChapterStylesheets { sources })
1426 }
1427
1428 pub fn embedded_fonts(&mut self) -> Result<Vec<EmbeddedFontFace>, EpubError> {
1430 self.embedded_fonts_with_limits(FontLimits::default())
1431 }
1432
1433 pub fn embedded_fonts_with_options(
1435 &mut self,
1436 limits: FontLimits,
1437 ) -> Result<Vec<EmbeddedFontFace>, EpubError> {
1438 self.embedded_fonts_with_limits(limits)
1439 }
1440
1441 pub fn embedded_fonts_with_limits(
1445 &mut self,
1446 limits: FontLimits,
1447 ) -> Result<Vec<EmbeddedFontFace>, EpubError> {
1448 let faces = self.ensure_embedded_fonts_loaded()?;
1449 if faces.len() > limits.max_faces {
1450 return Err(EpubError::LimitExceeded {
1451 kind: LimitKind::FontLimit,
1452 actual: faces.len(),
1453 limit: limits.max_faces,
1454 path: None,
1455 });
1456 }
1457 Ok(faces.clone())
1458 }
1459
1460 pub fn embedded_fonts_with_scratch(
1471 &mut self,
1472 limits: FontLimits,
1473 scratch_buf: &mut Vec<u8>,
1474 ) -> Result<Vec<EmbeddedFontFace>, EpubError> {
1475 let css_hrefs: Vec<String> = self
1476 .metadata
1477 .manifest
1478 .iter()
1479 .filter(|item| item.media_type == "text/css")
1480 .map(|item| item.href.clone())
1481 .collect();
1482
1483 let mut out = Vec::with_capacity(limits.max_faces.min(16));
1484
1485 for href in css_hrefs {
1486 if out.len() >= limits.max_faces {
1487 break;
1488 }
1489
1490 scratch_buf.clear();
1491 match self.read_resource_into(&href, scratch_buf) {
1492 Ok(_) => {}
1493 Err(_) => continue, }
1495
1496 if scratch_buf.len() > limits.max_bytes_per_font {
1497 continue; }
1499
1500 let css = match String::from_utf8(scratch_buf.clone()) {
1501 Ok(s) => s,
1502 Err(_) => continue, };
1504
1505 let faces = parse_font_faces_from_css(&href, &css);
1506 for face in faces {
1507 if out.len() >= limits.max_faces {
1508 break;
1509 }
1510 out.push(face);
1511 }
1512 }
1513
1514 Ok(out)
1515 }
1516
1517 pub fn chapter_styled_runs(&mut self, index: usize) -> Result<StyledChapter, EpubError> {
1525 self.chapter_styled_runs_with_options(index, RenderPrepOptions::default())
1526 }
1527
1528 pub fn chapter_styled_runs_with_options(
1535 &mut self,
1536 index: usize,
1537 options: RenderPrepOptions,
1538 ) -> Result<StyledChapter, EpubError> {
1539 let mut prep = RenderPrep::new(options).with_serif_default();
1540 let prepared = prep.prepare_chapter(self, index).map_err(EpubError::from)?;
1541 let mut items = Vec::with_capacity(0);
1542 for item in prepared.iter() {
1543 items.push(item.clone());
1544 }
1545 Ok(StyledChapter::from_items(items))
1546 }
1547
1548 pub fn chapter_events<F>(
1556 &mut self,
1557 index: usize,
1558 opts: ChapterEventsOptions,
1559 mut on_item: F,
1560 ) -> Result<usize, EpubError>
1561 where
1562 F: FnMut(StyledEventOrRun) -> Result<(), EpubError>,
1563 {
1564 let mut prep = RenderPrep::new(opts.render).with_serif_default();
1565 let mut emitted = 0usize;
1566 let mut callback_error: Option<EpubError> = None;
1567 let mut hit_cap = false;
1568
1569 prep.prepare_chapter_with(self, index, |item| {
1570 if callback_error.is_some() || hit_cap {
1571 return;
1572 }
1573 if emitted >= opts.max_items {
1574 hit_cap = true;
1575 return;
1576 }
1577 if let Err(err) = on_item(item) {
1578 callback_error = Some(err);
1579 return;
1580 }
1581 emitted += 1;
1582 })
1583 .map_err(EpubError::from)?;
1584
1585 if let Some(err) = callback_error {
1586 return Err(err);
1587 }
1588 if hit_cap {
1589 return Err(EpubError::Parse(format!(
1592 "Chapter event count exceeded max_items ({})",
1593 opts.max_items
1594 )));
1595 }
1596 Ok(emitted)
1597 }
1598
1599 #[inline(never)]
1615 pub fn chapter_events_with_scratch<F>(
1616 &mut self,
1617 index: usize,
1618 opts: ChapterEventsOptions,
1619 chapter_buf: &mut Vec<u8>,
1620 scratch: &mut crate::streaming::ScratchBuffers,
1621 mut on_item: F,
1622 ) -> Result<ChapterStreamResult, EpubError>
1623 where
1624 F: FnMut(StyledEventOrRun) -> Result<(), EpubError>,
1625 {
1626 use crate::zip::CdEntry;
1627
1628 chapter_buf.clear();
1630 scratch.clear();
1631
1632 let chapter = self.chapter(index)?;
1633 let href = chapter.href;
1634 let zip_path = resolve_opf_relative_path(&self.opf_path, &href);
1635
1636 let entry = self
1638 .zip
1639 .get_entry(&zip_path)
1640 .ok_or(EpubError::Zip(ZipError::FileNotFound))?
1641 .clone();
1642
1643 let uncompressed = usize::try_from(entry.uncompressed_size)
1645 .map_err(|_| EpubError::Zip(ZipError::FileTooLarge))?;
1646
1647 if let Some(limits) = self.zip.limits() {
1649 if uncompressed > limits.max_file_read_size {
1650 return Err(EpubError::LimitExceeded {
1651 kind: LimitKind::FileSize,
1652 actual: uncompressed,
1653 limit: limits.max_file_read_size,
1654 path: Some(zip_path),
1655 });
1656 }
1657 }
1658
1659 if uncompressed > opts.render.memory.max_entry_bytes {
1661 return Err(EpubError::LimitExceeded {
1662 kind: LimitKind::MemoryBudget,
1663 actual: uncompressed,
1664 limit: opts.render.memory.max_entry_bytes,
1665 path: Some(zip_path),
1666 });
1667 }
1668
1669 if uncompressed > chapter_buf.capacity() {
1671 return Err(EpubError::BufferTooSmall {
1672 required: uncompressed,
1673 provided: chapter_buf.capacity(),
1674 context: "chapter_buf".to_string(),
1675 });
1676 }
1677
1678 let use_entry = CdEntry {
1680 filename: String::with_capacity(0),
1681 method: entry.method,
1682 compressed_size: entry.compressed_size,
1683 uncompressed_size: entry.uncompressed_size,
1684 local_header_offset: entry.local_header_offset,
1685 crc32: entry.crc32,
1686 };
1687
1688 if chapter_buf.len() < uncompressed {
1693 chapter_buf.resize(uncompressed, 0);
1694 }
1695 if scratch.read_buf.is_empty() {
1696 let read_chunk = scratch.read_buf.capacity().max(1024);
1699 scratch.read_buf.resize(read_chunk, 0);
1700 }
1701 let bytes_read = self
1702 .zip
1703 .read_file_with_scratch(
1704 &use_entry,
1705 chapter_buf.as_mut_slice(),
1706 &mut scratch.read_buf,
1707 )
1708 .map_err(EpubError::Zip)?;
1709 chapter_buf.truncate(bytes_read);
1710
1711 let mut emitted = 0usize;
1712 let mut callback_err: Option<EpubError> = None;
1713 let mut prep = RenderPrep::new(opts.render).with_serif_default();
1714 prep.prepare_chapter_bytes_with_scratch(
1715 self,
1716 index,
1717 chapter_buf,
1718 &mut scratch.read_buf,
1719 |item| {
1720 if callback_err.is_some() || emitted >= opts.max_items {
1721 return;
1722 }
1723 if let Err(e) = on_item(item) {
1724 callback_err = Some(e);
1725 return;
1726 }
1727 emitted += 1;
1728 },
1729 )
1730 .map_err(EpubError::from)?;
1731
1732 if let Some(err) = callback_err {
1733 return Err(err);
1734 }
1735 if emitted >= opts.max_items {
1736 return Err(EpubError::LimitExceeded {
1737 kind: LimitKind::EventCount,
1738 actual: emitted,
1739 limit: opts.max_items,
1740 path: Some(zip_path),
1741 });
1742 }
1743
1744 Ok(ChapterStreamResult {
1745 items_emitted: emitted,
1746 bytes_read: chapter_buf.len(),
1747 complete: true,
1748 })
1749 }
1750
1751 pub fn chapter_text(&mut self, index: usize) -> Result<String, EpubError> {
1761 let mut out = String::with_capacity(0);
1762 self.chapter_text_into(index, &mut out)?;
1763 Ok(out)
1764 }
1765
1766 pub fn chapter_text_into(&mut self, index: usize, out: &mut String) -> Result<(), EpubError> {
1776 self.chapter_text_into_with_limit(index, usize::MAX, out)
1777 }
1778
1779 pub fn chapter_text_with_limit(
1783 &mut self,
1784 index: usize,
1785 max_bytes: usize,
1786 ) -> Result<String, EpubError> {
1787 let mut out = String::with_capacity(0);
1788 self.chapter_text_into_with_limit(index, max_bytes, &mut out)?;
1789 Ok(out)
1790 }
1791
1792 pub fn chapter_text_into_with_limit(
1796 &mut self,
1797 index: usize,
1798 max_bytes: usize,
1799 out: &mut String,
1800 ) -> Result<(), EpubError> {
1801 out.clear();
1802 if max_bytes == 0 {
1803 return Ok(());
1804 }
1805
1806 let chapter = self.chapter(index)?;
1807 let bytes = self.read_resource(&chapter.href)?;
1808 extract_plain_text_limited(&bytes, max_bytes, out)
1809 }
1810
1811 pub fn tokenize_spine_item(&mut self, index: usize) -> Result<Vec<Token>, EpubError> {
1822 let chapter = self.chapter(index)?;
1823 let bytes = self.read_resource(&chapter.href)?;
1824 let html =
1825 str::from_utf8(&bytes).map_err(|_| EpubError::ChapterNotUtf8 { href: chapter.href })?;
1826 tokenize_html(html).map_err(EpubError::from)
1827 }
1828
1829 pub fn read_spine_chapter(&mut self, index: usize) -> Result<Vec<u8>, EpubError> {
1831 self.read_spine_item_bytes(index)
1832 }
1833
1834 pub fn tokenize_spine_chapter(&mut self, index: usize) -> Result<Vec<Token>, EpubError> {
1836 self.tokenize_spine_item(index)
1837 }
1838
1839 fn ensure_embedded_fonts_loaded(&mut self) -> Result<&Vec<EmbeddedFontFace>, EpubError> {
1840 if self.embedded_fonts_cache.is_none() {
1841 let css_hrefs: Vec<String> = self
1842 .metadata
1843 .manifest
1844 .iter()
1845 .filter(|item| item.media_type == "text/css")
1846 .map(|item| item.href.clone())
1847 .collect();
1848 let mut out = Vec::with_capacity(0);
1849 for href in css_hrefs {
1850 let bytes = self.read_resource(&href)?;
1851 let css = String::from_utf8(bytes)
1852 .map_err(|_| EpubError::Parse(format!("Stylesheet is not UTF-8: {}", href)))?;
1853 out.extend(parse_font_faces_from_css(&href, &css));
1854 }
1855 self.embedded_fonts_cache = Some(out);
1856 }
1857 self.embedded_fonts_cache
1858 .as_ref()
1859 .ok_or_else(|| EpubError::Parse("Embedded font cache initialization failed".into()))
1860 }
1861}
1862
1863impl EpubBook<File> {
1864 pub fn builder() -> EpubBookBuilder {
1866 EpubBookBuilder::new()
1867 }
1868}
1869
1870fn load_summary_from_zip<R: Read + Seek>(
1871 zip: &mut StreamingZip<R>,
1872 options: EpubBookOptions,
1873) -> Result<EpubSummary, EpubError> {
1874 zip.validate_mimetype().map_err(EpubError::Zip)?;
1875 let container = read_entry(zip, "META-INF/container.xml")?;
1876 let opf_path = crate::metadata::parse_container_xml(&container)?;
1877 let opf = read_entry(zip, &opf_path)?;
1878 let metadata = extract_metadata(&container, &opf)?;
1879 let spine = crate::spine::parse_spine(&opf)?;
1880 validate_open_invariants(&metadata, &spine, options.validation_mode)?;
1881 let navigation = parse_navigation(
1882 zip,
1883 &metadata,
1884 &spine,
1885 &opf_path,
1886 options.validation_mode,
1887 options.max_nav_bytes,
1888 )?;
1889
1890 Ok(EpubSummary {
1891 metadata,
1892 spine,
1893 navigation,
1894 })
1895}
1896
1897fn parse_navigation<R: Read + Seek>(
1898 zip: &mut StreamingZip<R>,
1899 metadata: &EpubMetadata,
1900 spine: &Spine,
1901 opf_path: &str,
1902 validation_mode: ValidationMode,
1903 max_nav_bytes: Option<usize>,
1904) -> Result<Option<Navigation>, EpubError> {
1905 let nav_item = spine
1906 .toc_id()
1907 .and_then(|toc_id| metadata.get_item(toc_id))
1908 .or_else(|| {
1909 metadata.manifest.iter().find(|item| {
1910 item.properties
1911 .as_deref()
1912 .is_some_and(|p| p.split_whitespace().any(|prop| prop == "nav"))
1913 })
1914 })
1915 .or_else(|| {
1916 metadata.manifest.iter().find(|item| {
1917 item.media_type == "application/x-dtbncx+xml"
1918 || item.href.to_ascii_lowercase().ends_with(".ncx")
1919 })
1920 });
1921
1922 let Some(nav_item) = nav_item else {
1923 return Ok(None);
1924 };
1925
1926 let nav_path = resolve_opf_relative_path(opf_path, &nav_item.href);
1927 let nav_bytes = match read_entry(zip, &nav_path) {
1928 Ok(bytes) => bytes,
1929 Err(err) => {
1930 if matches!(validation_mode, ValidationMode::Strict) {
1931 return Err(err);
1932 }
1933 log::warn!("Failed to read navigation document '{}': {}", nav_path, err);
1934 return Ok(None);
1935 }
1936 };
1937
1938 if let Some(limit) = max_nav_bytes {
1939 if nav_bytes.len() > limit {
1940 return Err(EpubError::Phase(PhaseError {
1941 phase: ErrorPhase::Open,
1942 code: "NAV_BYTES_LIMIT",
1943 message: format!(
1944 "Navigation bytes exceed configured max_nav_bytes ({} > {})",
1945 nav_bytes.len(),
1946 limit
1947 )
1948 .into_boxed_str(),
1949 context: Some(Box::new(PhaseErrorContext {
1950 source: None,
1951 path: Some(nav_path.clone().into_boxed_str()),
1952 href: Some(nav_item.href.clone().into_boxed_str()),
1953 chapter_index: None,
1954 selector: None,
1955 selector_index: None,
1956 declaration: None,
1957 declaration_index: None,
1958 token_offset: None,
1959 limit: Some(Box::new(ErrorLimitContext::new(
1960 "max_nav_bytes",
1961 nav_bytes.len(),
1962 limit,
1963 ))),
1964 })),
1965 }));
1966 }
1967 }
1968
1969 let parsed = if nav_item.media_type == "application/x-dtbncx+xml"
1970 || nav_item.href.to_ascii_lowercase().ends_with(".ncx")
1971 {
1972 parse_ncx(&nav_bytes)
1973 } else {
1974 parse_nav_xhtml(&nav_bytes)
1975 };
1976
1977 match parsed {
1978 Ok(nav) => Ok(Some(nav)),
1979 Err(err) => {
1980 if matches!(validation_mode, ValidationMode::Strict) {
1981 Err(EpubError::Navigation(err.to_string()))
1982 } else {
1983 log::warn!(
1984 "Failed to parse navigation document '{}': {}",
1985 nav_path,
1986 err
1987 );
1988 Ok(None)
1989 }
1990 }
1991 }
1992}
1993
1994fn validate_open_invariants(
1995 metadata: &EpubMetadata,
1996 spine: &Spine,
1997 validation_mode: ValidationMode,
1998) -> Result<(), EpubError> {
1999 if matches!(validation_mode, ValidationMode::Lenient) {
2000 return Ok(());
2001 }
2002
2003 for item in spine.items() {
2004 if metadata.get_item(&item.idref).is_none() {
2005 return Err(EpubError::ManifestItemMissing {
2006 idref: item.idref.clone(),
2007 });
2008 }
2009 }
2010
2011 Ok(())
2012}
2013
2014fn read_entry<R: Read + Seek>(zip: &mut StreamingZip<R>, path: &str) -> Result<Vec<u8>, EpubError> {
2015 let mut buf = Vec::with_capacity(0);
2016 read_entry_into(zip, path, &mut buf)?;
2017 Ok(buf)
2018}
2019
2020fn read_entry_into<R: Read + Seek, W: Write>(
2021 zip: &mut StreamingZip<R>,
2022 path: &str,
2023 writer: &mut W,
2024) -> Result<usize, EpubError> {
2025 read_entry_into_with_limit(zip, path, writer, usize::MAX)
2026}
2027
2028fn read_entry_into_with_limit<R: Read + Seek, W: Write>(
2029 zip: &mut StreamingZip<R>,
2030 path: &str,
2031 writer: &mut W,
2032 max_bytes: usize,
2033) -> Result<usize, EpubError> {
2034 let (method, compressed_size, uncompressed_size, local_header_offset, crc32) = {
2035 let entry = zip
2036 .get_entry(path)
2037 .ok_or(EpubError::Zip(ZipError::FileNotFound))?;
2038 (
2039 entry.method,
2040 entry.compressed_size,
2041 entry.uncompressed_size,
2042 entry.local_header_offset,
2043 entry.crc32,
2044 )
2045 };
2046
2047 if uncompressed_size > max_bytes as u64 || compressed_size > max_bytes as u64 {
2048 return Err(EpubError::Zip(ZipError::FileTooLarge));
2049 }
2050 let entry = CdEntry {
2051 method,
2052 compressed_size,
2053 uncompressed_size,
2054 local_header_offset,
2055 crc32,
2056 filename: String::with_capacity(0),
2057 };
2058 zip.read_file_to_writer(&entry, writer)
2059 .map_err(EpubError::Zip)
2060}
2061
2062fn resolve_opf_relative_path(opf_path: &str, href: &str) -> String {
2063 let href = href.split('#').next().unwrap_or(href);
2064 if href.is_empty() {
2065 return normalize_path(opf_path);
2066 }
2067 if href.starts_with('/') {
2068 return normalize_path(href.trim_start_matches('/'));
2069 }
2070 if href.contains("://") {
2071 return href.to_string();
2072 }
2073
2074 let base_dir = opf_path.rsplit_once('/').map(|(dir, _)| dir).unwrap_or("");
2075 if base_dir.is_empty() {
2076 normalize_path(href)
2077 } else {
2078 normalize_path(&format!("{}/{}", base_dir, href))
2079 }
2080}
2081
2082fn normalize_path(path: &str) -> String {
2083 let mut parts: Vec<&str> = Vec::with_capacity(0);
2084 for part in path.split('/') {
2085 match part {
2086 "" | "." => {}
2087 ".." => {
2088 parts.pop();
2089 }
2090 _ => parts.push(part),
2091 }
2092 }
2093 parts.join("/")
2094}
2095
2096fn is_cover_document(media_type: Option<&str>, path: &str) -> bool {
2097 if let Some(media_type) = media_type {
2098 let lower = media_type.trim().to_ascii_lowercase();
2099 if lower.contains("xhtml") || lower.contains("html") {
2100 return true;
2101 }
2102 }
2103 let lower = path.to_ascii_lowercase();
2104 lower.ends_with(".xhtml") || lower.ends_with(".html") || lower.ends_with(".htm")
2105}
2106
2107fn infer_image_media_type_from_path(path: &str) -> Option<&'static str> {
2108 let lower = path.to_ascii_lowercase();
2109 if lower.ends_with(".jpg") || lower.ends_with(".jpeg") {
2110 Some("image/jpeg")
2111 } else if lower.ends_with(".png") {
2112 Some("image/png")
2113 } else if lower.ends_with(".gif") {
2114 Some("image/gif")
2115 } else if lower.ends_with(".bmp") {
2116 Some("image/bmp")
2117 } else if lower.ends_with(".webp") {
2118 Some("image/webp")
2119 } else if lower.ends_with(".svg") || lower.ends_with(".svgz") {
2120 Some("image/svg+xml")
2121 } else {
2122 None
2123 }
2124}
2125
2126fn normalized_image_media_type(media_type: Option<&str>, path: &str) -> Option<String> {
2127 media_type
2128 .map(|value| value.trim().to_ascii_lowercase())
2129 .or_else(|| infer_image_media_type_from_path(path).map(ToString::to_string))
2130}
2131
2132fn is_supported_image_resource(
2133 media_type: Option<&str>,
2134 path: &str,
2135 options: ImageReadOptions,
2136) -> bool {
2137 let Some(media) = normalized_image_media_type(media_type, path) else {
2138 return options.allow_unknown_images;
2139 };
2140 if !media.starts_with("image/") {
2141 return false;
2142 }
2143 if !options.allow_svg && media == "image/svg+xml" {
2144 return false;
2145 }
2146 true
2147}
2148
2149fn should_skip_text_tag(name: &str) -> bool {
2150 matches!(
2151 name,
2152 "script" | "style" | "head" | "nav" | "header" | "footer" | "aside" | "noscript"
2153 )
2154}
2155
2156fn normalize_plain_text_whitespace(text: &str) -> String {
2157 let mut result = String::with_capacity(text.len());
2158 let mut prev_was_space = true;
2159 for ch in text.chars() {
2160 if ch.is_whitespace() {
2161 if !prev_was_space {
2162 result.push(' ');
2163 prev_was_space = true;
2164 }
2165 } else {
2166 result.push(ch);
2167 prev_was_space = false;
2168 }
2169 }
2170 if result.ends_with(' ') {
2171 result.pop();
2172 }
2173 result
2174}
2175
2176fn push_limited(out: &mut String, value: &str, max_bytes: usize) -> bool {
2177 if out.len() >= max_bytes || value.is_empty() {
2178 return out.len() >= max_bytes;
2179 }
2180 let remaining = max_bytes - out.len();
2181 if value.len() <= remaining {
2182 out.push_str(value);
2183 return false;
2184 }
2185 let mut end = remaining;
2186 while !value.is_char_boundary(end) {
2187 end -= 1;
2188 }
2189 if end > 0 {
2190 out.push_str(&value[..end]);
2191 }
2192 true
2193}
2194
2195fn push_newline_limited(out: &mut String, max_bytes: usize) -> bool {
2196 if out.is_empty() || out.ends_with('\n') {
2197 return false;
2198 }
2199 push_limited(out, "\n", max_bytes)
2200}
2201
2202fn push_text_limited(out: &mut String, text: &str, max_bytes: usize) -> bool {
2203 if text.is_empty() {
2204 return false;
2205 }
2206 if !out.is_empty() && !out.ends_with('\n') && push_limited(out, " ", max_bytes) {
2207 return true;
2208 }
2209 push_limited(out, text, max_bytes)
2210}
2211
2212fn extract_plain_text_limited(
2213 html: &[u8],
2214 max_bytes: usize,
2215 out: &mut String,
2216) -> Result<(), EpubError> {
2217 let mut reader = Reader::from_reader(html);
2218 reader.config_mut().trim_text(false);
2219 reader.config_mut().expand_empty_elements = false;
2220
2221 let mut buf = Vec::with_capacity(0);
2222 let mut skip_depth = 0usize;
2223 let mut done = false;
2224
2225 while !done {
2226 match reader.read_event_into(&mut buf) {
2227 Ok(Event::Start(e)) => {
2228 let name = reader
2229 .decoder()
2230 .decode(e.name().as_ref())
2231 .map_err(|err| EpubError::Parse(format!("Decode error: {:?}", err)))?
2232 .to_string();
2233 if should_skip_text_tag(&name) {
2234 skip_depth += 1;
2235 } else if skip_depth == 0
2236 && matches!(name.as_str(), "p" | "div" | "li")
2237 && push_newline_limited(out, max_bytes)
2238 {
2239 done = true;
2240 }
2241 }
2242 Ok(Event::Empty(e)) => {
2243 if skip_depth > 0 {
2244 buf.clear();
2245 continue;
2246 }
2247 let name = reader
2248 .decoder()
2249 .decode(e.name().as_ref())
2250 .map_err(|err| EpubError::Parse(format!("Decode error: {:?}", err)))?
2251 .to_string();
2252 if matches!(name.as_str(), "br" | "p" | "div" | "li")
2253 && push_newline_limited(out, max_bytes)
2254 {
2255 done = true;
2256 }
2257 }
2258 Ok(Event::End(e)) => {
2259 let name = reader
2260 .decoder()
2261 .decode(e.name().as_ref())
2262 .map_err(|err| EpubError::Parse(format!("Decode error: {:?}", err)))?
2263 .to_string();
2264 if should_skip_text_tag(&name) {
2265 skip_depth = skip_depth.saturating_sub(1);
2266 } else if skip_depth == 0
2267 && matches!(name.as_str(), "p" | "div" | "li")
2268 && push_newline_limited(out, max_bytes)
2269 {
2270 done = true;
2271 }
2272 }
2273 Ok(Event::Text(e)) => {
2274 if skip_depth > 0 {
2275 buf.clear();
2276 continue;
2277 }
2278 let text = e
2279 .decode()
2280 .map_err(|err| EpubError::Parse(format!("Decode error: {:?}", err)))?
2281 .to_string();
2282 let normalized = normalize_plain_text_whitespace(&text);
2283 if push_text_limited(out, &normalized, max_bytes) {
2284 done = true;
2285 }
2286 }
2287 Ok(Event::CData(e)) => {
2288 if skip_depth > 0 {
2289 buf.clear();
2290 continue;
2291 }
2292 let text = reader
2293 .decoder()
2294 .decode(&e)
2295 .map_err(|err| EpubError::Parse(format!("Decode error: {:?}", err)))?
2296 .to_string();
2297 let normalized = normalize_plain_text_whitespace(&text);
2298 if push_text_limited(out, &normalized, max_bytes) {
2299 done = true;
2300 }
2301 }
2302 Ok(Event::GeneralRef(e)) => {
2303 if skip_depth > 0 {
2304 buf.clear();
2305 continue;
2306 }
2307 let entity_name = e
2308 .decode()
2309 .map_err(|err| EpubError::Parse(format!("Decode error: {:?}", err)))?;
2310 let entity = format!("&{};", entity_name);
2311 let resolved = quick_xml::escape::unescape(&entity)
2312 .map_err(|err| EpubError::Parse(format!("Unescape error: {:?}", err)))?
2313 .to_string();
2314 let normalized = normalize_plain_text_whitespace(&resolved);
2315 if push_text_limited(out, &normalized, max_bytes) {
2316 done = true;
2317 }
2318 }
2319 Ok(Event::Eof) => break,
2320 Ok(_) => {}
2321 Err(err) => return Err(EpubError::Parse(format!("XML error: {:?}", err))),
2322 }
2323 buf.clear();
2324 }
2325
2326 if out.ends_with('\n') {
2327 out.pop();
2328 }
2329 Ok(())
2330}
2331
2332#[cfg(test)]
2333mod tests {
2334 use super::*;
2335 use crate::render_prep::{RenderPrep, RenderPrepOptions, RenderPrepTrace, StyledEventOrRun};
2336
2337 #[test]
2338 fn test_resolve_opf_relative_path() {
2339 assert_eq!(
2340 resolve_opf_relative_path("EPUB/package.opf", "text/ch1.xhtml"),
2341 "EPUB/text/ch1.xhtml"
2342 );
2343 assert_eq!(
2344 resolve_opf_relative_path("OEBPS/content.opf", "../toc.ncx"),
2345 "toc.ncx"
2346 );
2347 assert_eq!(
2348 resolve_opf_relative_path("package.opf", "chapter.xhtml#p1"),
2349 "chapter.xhtml"
2350 );
2351 assert_eq!(
2352 resolve_opf_relative_path("EPUB/package.opf", "/META-INF/container.xml"),
2353 "META-INF/container.xml"
2354 );
2355 }
2356
2357 #[test]
2358 fn test_read_resource_into_streams_to_writer() {
2359 let file = std::fs::File::open(
2360 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2361 )
2362 .expect("fixture should open");
2363 let mut book = EpubBook::from_reader(file).expect("book should open");
2364
2365 let mut out = Vec::with_capacity(0);
2366 let n = book
2367 .read_resource_into("xhtml/nav.xhtml", &mut out)
2368 .expect("resource should stream");
2369 assert_eq!(n, out.len());
2370 assert!(!out.is_empty());
2371 }
2372
2373 #[test]
2374 fn test_read_resource_into_with_hard_cap_errors_when_exceeded() {
2375 let file = std::fs::File::open(
2376 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2377 )
2378 .expect("fixture should open");
2379 let mut book = EpubBook::from_reader(file).expect("book should open");
2380
2381 let mut out = Vec::with_capacity(0);
2382 let err = book
2383 .read_resource_into_with_hard_cap("xhtml/nav.xhtml", &mut out, 8)
2384 .expect_err("hard cap should fail");
2385 assert!(matches!(err, EpubError::Zip(ZipError::FileTooLarge)));
2386 }
2387
2388 #[test]
2389 fn test_read_resource_into_with_limit_succeeds_when_under_cap() {
2390 let file = std::fs::File::open(
2391 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2392 )
2393 .expect("fixture should open");
2394 let mut book = EpubBook::from_reader(file).expect("book should open");
2395
2396 let mut out = Vec::with_capacity(0);
2397 let n = book
2398 .read_resource_into_with_limit("xhtml/nav.xhtml", &mut out, 1024 * 1024)
2399 .expect("limit should allow nav payload");
2400 assert_eq!(n, out.len());
2401 assert!(!out.is_empty());
2402 }
2403
2404 #[test]
2405 fn test_cover_image_ref_fundamental_fixture() {
2406 let file = std::fs::File::open(
2407 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2408 )
2409 .expect("fixture should open");
2410 let mut book = EpubBook::from_reader(file).expect("book should open");
2411
2412 let cover = book
2413 .cover_image_ref()
2414 .expect("cover resolution should succeed")
2415 .expect("cover should resolve");
2416 assert_eq!(cover.source, CoverImageSource::Manifest);
2417 assert_eq!(cover.media_type.as_deref(), Some("image/jpeg"));
2418 assert!(cover.zip_path.ends_with("images/cover.jpg"));
2419 }
2420
2421 #[test]
2422 fn test_cover_image_ref_frankenstein_fixture() {
2423 let file = std::fs::File::open("tests/fixtures/bench/pg84-frankenstein.epub")
2424 .expect("fixture should open");
2425 let mut book = EpubBook::from_reader(file).expect("book should open");
2426
2427 let cover = book
2428 .cover_image_ref()
2429 .expect("cover resolution should succeed")
2430 .expect("cover should resolve");
2431 assert_eq!(cover.source, CoverImageSource::Manifest);
2432 assert_eq!(cover.media_type.as_deref(), Some("image/jpeg"));
2433 assert!(cover.zip_path.ends_with("_cover.jpg"));
2434 }
2435
2436 #[test]
2437 fn test_read_cover_image_into_respects_limit() {
2438 let file = std::fs::File::open("tests/fixtures/bench/pg84-frankenstein.epub")
2439 .expect("fixture should open");
2440 let mut book = EpubBook::from_reader(file).expect("book should open");
2441
2442 let mut out = Vec::with_capacity(0);
2443 let err = book
2444 .read_cover_image_into_with_options(
2445 &mut out,
2446 CoverImageOptions {
2447 image: ImageReadOptions {
2448 max_bytes: 1024,
2449 ..ImageReadOptions::default()
2450 },
2451 ..CoverImageOptions::default()
2452 },
2453 )
2454 .expect_err("oversized cover should fail under hard cap");
2455 assert!(matches!(err, EpubError::Zip(ZipError::FileTooLarge)));
2456 }
2457
2458 #[test]
2459 fn test_read_image_resource_into_rejects_non_image_payload() {
2460 let file = std::fs::File::open(
2461 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2462 )
2463 .expect("fixture should open");
2464 let mut book = EpubBook::from_reader(file).expect("book should open");
2465 let mut out = Vec::with_capacity(0);
2466 let err = book
2467 .read_image_resource_into_with_options(
2468 "xhtml/nav.xhtml",
2469 &mut out,
2470 ImageReadOptions::default(),
2471 )
2472 .expect_err("non-image resources should be rejected");
2473 match err {
2474 EpubError::Parse(msg) => {
2475 assert!(msg.contains("not an allowed image"));
2476 }
2477 other => panic!("expected parse error, got {:?}", other),
2478 }
2479 }
2480
2481 #[test]
2482 fn test_open_enforces_max_nav_bytes_limit() {
2483 let file = std::fs::File::open(
2484 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2485 )
2486 .expect("fixture should open");
2487 let err = match EpubBook::from_reader_with_options(
2488 file,
2489 EpubBookOptions {
2490 max_nav_bytes: Some(8),
2491 ..EpubBookOptions::default()
2492 },
2493 ) {
2494 Ok(_) => panic!("open should fail when navigation exceeds cap"),
2495 Err(err) => err,
2496 };
2497 match err {
2498 EpubError::Phase(phase) => {
2499 assert_eq!(phase.code, "NAV_BYTES_LIMIT");
2500 let ctx = phase.context.expect("phase context should be present");
2501 let limit = ctx.limit.expect("limit context should be present");
2502 assert_eq!(limit.kind.as_ref(), "max_nav_bytes");
2503 assert_eq!(limit.limit, 8);
2504 }
2505 other => panic!("expected phase error, got {:?}", other),
2506 }
2507 }
2508
2509 #[test]
2510 fn test_lazy_navigation_loaded_by_ensure_navigation() {
2511 let file = std::fs::File::open(
2512 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2513 )
2514 .expect("fixture should open");
2515 let mut book = EpubBook::from_reader_with_config(
2516 file,
2517 OpenConfig {
2518 options: EpubBookOptions::default(),
2519 lazy_navigation: true,
2520 },
2521 )
2522 .expect("book should open");
2523 assert!(book.navigation().is_none());
2524 let nav = book
2525 .ensure_navigation()
2526 .expect("ensure navigation should parse");
2527 assert!(nav.is_some());
2528 }
2529
2530 #[test]
2531 fn test_chapter_text_into_matches_chapter_text() {
2532 let file = std::fs::File::open(
2533 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2534 )
2535 .expect("fixture should open");
2536 let mut book = EpubBook::from_reader(file).expect("book should open");
2537 let baseline = book.chapter_text(0).expect("chapter text should extract");
2538 let mut out = String::with_capacity(0);
2539 book.chapter_text_into(0, &mut out)
2540 .expect("chapter text into should extract");
2541 assert_eq!(baseline, out);
2542 }
2543
2544 #[test]
2545 fn test_chapter_html_into_matches_chapter_html() {
2546 let file = std::fs::File::open(
2547 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2548 )
2549 .expect("fixture should open");
2550 let mut book = EpubBook::from_reader(file).expect("book should open");
2551
2552 let baseline = book.chapter_html(0).expect("chapter html should extract");
2553 let mut out = String::with_capacity(0);
2554 book.chapter_html_into(0, &mut out)
2555 .expect("chapter html into should extract");
2556 assert_eq!(baseline, out);
2557 }
2558
2559 #[test]
2560 fn test_chapter_html_into_with_limit_enforces_cap() {
2561 let file = std::fs::File::open(
2562 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2563 )
2564 .expect("fixture should open");
2565 let mut book = EpubBook::from_reader(file).expect("book should open");
2566
2567 let mut out = String::with_capacity(0);
2568 let err = book
2569 .chapter_html_into_with_limit(0, 8, &mut out)
2570 .expect_err("hard cap should fail");
2571 assert!(matches!(err, EpubError::Zip(ZipError::FileTooLarge)));
2572 }
2573
2574 #[test]
2575 fn test_chapter_text_with_limit_truncates_safely() {
2576 let file = std::fs::File::open(
2577 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2578 )
2579 .expect("fixture should open");
2580 let mut book = EpubBook::from_reader(file).expect("book should open");
2581 let full = book.chapter_text(0).expect("full text should extract");
2582 let limited = book
2583 .chapter_text_with_limit(0, 64)
2584 .expect("limited text should extract");
2585 assert!(limited.len() <= 64);
2586 assert!(full.starts_with(&limited));
2587 }
2588
2589 #[test]
2590 fn test_chapter_text_with_zero_limit_is_empty() {
2591 let file = std::fs::File::open(
2592 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2593 )
2594 .expect("fixture should open");
2595 let mut book = EpubBook::from_reader(file).expect("book should open");
2596 let limited = book
2597 .chapter_text_with_limit(0, 0)
2598 .expect("limited text should extract");
2599 assert!(limited.is_empty());
2600 }
2601
2602 #[test]
2603 fn test_chapter_text_into_with_limit_clears_existing_buffer() {
2604 let file = std::fs::File::open(
2605 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2606 )
2607 .expect("fixture should open");
2608 let mut book = EpubBook::from_reader(file).expect("book should open");
2609 let mut out = String::from("stale content");
2610 book.chapter_text_into_with_limit(0, 32, &mut out)
2611 .expect("limited text should extract");
2612 assert!(!out.starts_with("stale content"));
2613 assert!(out.len() <= 32);
2614 }
2615
2616 #[test]
2617 fn test_extract_plain_text_limited_preserves_utf8_boundaries() {
2618 let html = "<p>hello 😀 world</p>";
2619 let mut out = String::with_capacity(0);
2620 extract_plain_text_limited(html.as_bytes(), 8, &mut out).expect("extract should succeed");
2621 assert!(out.len() <= 8);
2622 assert!(core::str::from_utf8(out.as_bytes()).is_ok());
2623 }
2624
2625 #[test]
2626 fn test_chapter_stylesheets_api_works() {
2627 let file = std::fs::File::open(
2628 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2629 )
2630 .expect("fixture should open");
2631 let mut book = EpubBook::from_reader(file).expect("book should open");
2632 let styles = book
2633 .chapter_stylesheets(0)
2634 .expect("chapter_stylesheets should succeed");
2635 assert!(styles.sources.iter().all(|s| !s.href.is_empty()));
2636 }
2637
2638 #[test]
2639 fn test_styles_for_chapter_alias_matches_with_options() {
2640 let file = std::fs::File::open(
2641 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2642 )
2643 .expect("fixture should open");
2644 let mut book = EpubBook::from_reader(file).expect("book should open");
2645 let limits = StyleLimits::default();
2646 let a = book
2647 .chapter_stylesheets_with_options(0, limits)
2648 .expect("chapter_stylesheets_with_options should succeed");
2649 let b = book
2650 .styles_for_chapter(0, limits)
2651 .expect("styles_for_chapter should succeed");
2652 assert_eq!(a, b);
2653 }
2654
2655 #[test]
2656 fn test_embedded_fonts_api_works() {
2657 let file = std::fs::File::open(
2658 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2659 )
2660 .expect("fixture should open");
2661 let mut book = EpubBook::from_reader(file).expect("book should open");
2662 let fonts = book
2663 .embedded_fonts()
2664 .expect("embedded_fonts should succeed");
2665 assert!(fonts.len() <= crate::render_prep::FontLimits::default().max_faces);
2666 }
2667
2668 #[test]
2669 fn test_embedded_fonts_with_limits_alias_matches_with_options() {
2670 let file = std::fs::File::open(
2671 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2672 )
2673 .expect("fixture should open");
2674 let mut book = EpubBook::from_reader(file).expect("book should open");
2675 let limits = FontLimits::default();
2676 let a = book
2677 .embedded_fonts_with_options(limits)
2678 .expect("embedded_fonts_with_options should succeed");
2679 let b = book
2680 .embedded_fonts_with_limits(limits)
2681 .expect("embedded_fonts_with_limits should succeed");
2682 assert_eq!(a, b);
2683 }
2684
2685 #[test]
2686 fn test_render_prep_golden_path_prepare_chapter() {
2687 let file = std::fs::File::open(
2688 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2689 )
2690 .expect("fixture should open");
2691 let mut book = EpubBook::from_reader(file).expect("book should open");
2692 let mut prep = RenderPrep::new(RenderPrepOptions::default())
2693 .with_serif_default()
2694 .with_embedded_fonts_from_book(&mut book)
2695 .expect("font registration should succeed");
2696 let index = (0..book.chapter_count())
2697 .find(|idx| {
2698 book.chapter_text_with_limit(*idx, 256)
2699 .map(|s| !s.trim().is_empty())
2700 .unwrap_or(false)
2701 })
2702 .unwrap_or(0);
2703 let chapter = prep
2704 .prepare_chapter(&mut book, index)
2705 .expect("prepare_chapter should succeed");
2706 assert!(chapter.iter().count() > 0);
2707 }
2708
2709 #[test]
2710 fn test_chapter_styled_runs_api_returns_items() {
2711 let file = std::fs::File::open(
2712 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2713 )
2714 .expect("fixture should open");
2715 let mut book = EpubBook::from_reader(file).expect("book should open");
2716 let index = (0..book.chapter_count())
2717 .find(|idx| {
2718 book.chapter_text_with_limit(*idx, 256)
2719 .map(|s| !s.trim().is_empty())
2720 .unwrap_or(false)
2721 })
2722 .unwrap_or(0);
2723 let styled = book
2724 .chapter_styled_runs(index)
2725 .expect("chapter_styled_runs should succeed");
2726 assert!(styled.iter().count() > 0);
2727 }
2728
2729 #[test]
2730 fn test_chapter_events_streaming_emits_items() {
2731 let file = std::fs::File::open(
2732 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2733 )
2734 .expect("fixture should open");
2735 let mut book = EpubBook::from_reader(file).expect("book should open");
2736 let index = (0..book.chapter_count())
2737 .find(|idx| {
2738 book.chapter_text_with_limit(*idx, 256)
2739 .map(|s| !s.trim().is_empty())
2740 .unwrap_or(false)
2741 })
2742 .unwrap_or(0);
2743
2744 let mut seen = 0usize;
2745 let emitted = book
2746 .chapter_events(index, ChapterEventsOptions::default(), |_| {
2747 seen += 1;
2748 Ok(())
2749 })
2750 .expect("chapter_events should succeed");
2751 assert_eq!(emitted, seen);
2752 assert!(emitted > 0);
2753 }
2754
2755 #[test]
2756 fn test_chapter_events_respects_max_items_cap() {
2757 let file = std::fs::File::open(
2758 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2759 )
2760 .expect("fixture should open");
2761 let mut book = EpubBook::from_reader(file).expect("book should open");
2762 let index = (0..book.chapter_count())
2763 .find(|idx| {
2764 book.chapter_text_with_limit(*idx, 256)
2765 .map(|s| !s.trim().is_empty())
2766 .unwrap_or(false)
2767 })
2768 .unwrap_or(0);
2769
2770 let err = book
2771 .chapter_events(
2772 index,
2773 ChapterEventsOptions {
2774 max_items: 1,
2775 ..ChapterEventsOptions::default()
2776 },
2777 |_| Ok(()),
2778 )
2779 .expect_err("max_items cap should fail");
2780 assert!(matches!(err, EpubError::Parse(_)));
2781 }
2782
2783 #[test]
2784 fn test_render_prep_prepare_chapter_into_streams_items() {
2785 let file = std::fs::File::open(
2786 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2787 )
2788 .expect("fixture should open");
2789 let mut book = EpubBook::from_reader(file).expect("book should open");
2790 let index = (0..book.chapter_count())
2791 .find(|idx| {
2792 book.chapter_text_with_limit(*idx, 256)
2793 .map(|s| !s.trim().is_empty())
2794 .unwrap_or(false)
2795 })
2796 .unwrap_or(0);
2797 let mut prep = RenderPrep::new(RenderPrepOptions::default())
2798 .with_serif_default()
2799 .with_embedded_fonts_from_book(&mut book)
2800 .expect("font registration should succeed");
2801 let mut out = Vec::with_capacity(0);
2802 prep.prepare_chapter_into(&mut book, index, &mut out)
2803 .expect("prepare_chapter_into should succeed");
2804 assert!(!out.is_empty());
2805 }
2806
2807 #[test]
2808 fn test_render_prep_runs_persist_resolved_font_id() {
2809 let file = std::fs::File::open(
2810 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2811 )
2812 .expect("fixture should open");
2813 let mut book = EpubBook::from_reader(file).expect("book should open");
2814 let index = (0..book.chapter_count())
2815 .find(|idx| {
2816 book.chapter_text_with_limit(*idx, 256)
2817 .map(|s| !s.trim().is_empty())
2818 .unwrap_or(false)
2819 })
2820 .unwrap_or(0);
2821 let mut prep = RenderPrep::new(RenderPrepOptions::default())
2822 .with_serif_default()
2823 .with_embedded_fonts_from_book(&mut book)
2824 .expect("font registration should succeed");
2825
2826 let mut saw_run = false;
2827 prep.prepare_chapter_with_trace_context(&mut book, index, |item, trace| {
2828 if let StyledEventOrRun::Run(run) = item {
2829 saw_run = true;
2830 let font_trace = trace.font_trace().expect("run should include font trace");
2831 assert_eq!(run.font_id, font_trace.face.font_id);
2832 assert_eq!(run.resolved_family, font_trace.face.family);
2833 }
2834 })
2835 .expect("prepare_chapter_with_trace_context should succeed");
2836 assert!(saw_run);
2837 }
2838
2839 #[test]
2840 fn test_render_prep_trace_context_contains_font_and_style_for_runs() {
2841 let file = std::fs::File::open(
2842 "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2843 )
2844 .expect("fixture should open");
2845 let mut book = EpubBook::from_reader(file).expect("book should open");
2846 let index = (0..book.chapter_count())
2847 .find(|idx| {
2848 book.chapter_text_with_limit(*idx, 256)
2849 .map(|s| !s.trim().is_empty())
2850 .unwrap_or(false)
2851 })
2852 .unwrap_or(0);
2853 let mut prep = RenderPrep::new(RenderPrepOptions::default())
2854 .with_serif_default()
2855 .with_embedded_fonts_from_book(&mut book)
2856 .expect("font registration should succeed");
2857
2858 let mut saw_run = false;
2859 prep.prepare_chapter_with_trace_context(&mut book, index, |item, trace| match item {
2860 StyledEventOrRun::Run(run) => {
2861 saw_run = true;
2862 match trace {
2863 RenderPrepTrace::Run { style, font } => {
2864 assert_eq!(style.as_ref(), &run.style);
2865 assert_eq!(font.face.font_id, run.font_id);
2866 assert_eq!(font.face.family, run.resolved_family);
2867 }
2868 RenderPrepTrace::Event => panic!("run item should produce run trace context"),
2869 }
2870 }
2871 StyledEventOrRun::Event(_) => {
2872 assert!(matches!(trace, RenderPrepTrace::Event));
2873 }
2874 StyledEventOrRun::Image(_) => {
2875 assert!(matches!(trace, RenderPrepTrace::Event));
2876 }
2877 })
2878 .expect("prepare_chapter_with_trace_context should succeed");
2879 assert!(saw_run);
2880 }
2881
2882 #[test]
2883 fn test_reading_session_resolve_locator_and_progress() {
2884 let chapters = vec![
2885 ChapterRef {
2886 index: 0,
2887 idref: "c1".to_string(),
2888 href: "text/ch1.xhtml".to_string(),
2889 media_type: "application/xhtml+xml".to_string(),
2890 },
2891 ChapterRef {
2892 index: 1,
2893 idref: "c2".to_string(),
2894 href: "text/ch2.xhtml".to_string(),
2895 media_type: "application/xhtml+xml".to_string(),
2896 },
2897 ];
2898 let nav = Navigation {
2899 toc: vec![NavPoint {
2900 label: "intro".to_string(),
2901 href: "text/ch2.xhtml#start".to_string(),
2902 children: Vec::with_capacity(0),
2903 }],
2904 page_list: Vec::with_capacity(0),
2905 landmarks: Vec::with_capacity(0),
2906 };
2907 let mut session = ReadingSession::new(chapters, Some(nav));
2908 let resolved = session
2909 .resolve_locator(Locator::TocId("intro".to_string()))
2910 .expect("toc id should resolve");
2911 assert_eq!(resolved.chapter.index, 1);
2912 assert_eq!(resolved.fragment.as_deref(), Some("start"));
2913 assert!(session.book_progress() > 0.0);
2914 }
2915
2916 #[test]
2917 fn test_reading_session_seek_position_out_of_bounds() {
2918 let chapters = vec![ChapterRef {
2919 index: 0,
2920 idref: "c1".to_string(),
2921 href: "text/ch1.xhtml".to_string(),
2922 media_type: "application/xhtml+xml".to_string(),
2923 }];
2924 let mut session = ReadingSession::new(chapters, None);
2925 let err = session
2926 .seek_position(&ReadingPosition {
2927 chapter_index: 2,
2928 chapter_href: None,
2929 anchor: None,
2930 fallback_offset: 0,
2931 })
2932 .expect_err("seek should fail");
2933 assert!(matches!(err, EpubError::ChapterOutOfBounds { .. }));
2934 }
2935}