Skip to main content

epub_stream/
book.rs

1//! High-level EPUB API for common workflows.
2//!
3//! This module provides a convenience wrapper around the lower-level parsers.
4//! It is intended for the common "open EPUB -> inspect metadata -> read chapters"
5//! flow.
6
7extern crate alloc;
8
9use alloc::format;
10use alloc::string::{String, ToString};
11use alloc::vec::Vec;
12use core::str;
13use quick_xml::events::Event;
14use quick_xml::reader::Reader;
15use std::fs::File;
16use std::io::{Read, Seek, Write};
17use std::path::Path;
18
19use crate::error::{
20    EpubError, ErrorLimitContext, ErrorPhase, LimitKind, PhaseError, PhaseErrorContext, ZipError,
21};
22use crate::metadata::{extract_metadata, EpubMetadata};
23use crate::navigation::{parse_nav_xhtml, parse_ncx, NavPoint, Navigation};
24use crate::render_prep::{
25    parse_font_faces_from_css, parse_stylesheet_links, ChapterStylesheets, EmbeddedFontFace,
26    FontLimits, RenderPrep, RenderPrepOptions, StyleLimits, StyledChapter, StyledEventOrRun,
27    StylesheetSource,
28};
29use crate::spine::Spine;
30
31use crate::tokenizer::{tokenize_html, Token};
32use crate::zip::{CdEntry, StreamingZip, ZipLimits};
33
34/// Validation strictness for high-level open/parse flows.
35#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
36#[non_exhaustive]
37pub enum ValidationMode {
38    /// Best-effort behavior for partial/quirky EPUBs.
39    #[default]
40    Lenient,
41    /// Fail early for structural inconsistencies.
42    Strict,
43}
44
45/// High-level configuration for opening EPUB books.
46#[derive(Clone, Copy, Debug, PartialEq, Eq)]
47pub struct EpubBookOptions {
48    /// Optional ZIP safety limits used while reading archive entries.
49    ///
50    /// When `None`, no explicit file-size caps are enforced by this crate.
51    pub zip_limits: Option<ZipLimits>,
52    /// Validation strictness for high-level parse/open behavior.
53    pub validation_mode: ValidationMode,
54    /// Optional cap for navigation payload bytes.
55    pub max_nav_bytes: Option<usize>,
56}
57
58impl Default for EpubBookOptions {
59    fn default() -> Self {
60        Self {
61            zip_limits: None,
62            validation_mode: ValidationMode::Lenient,
63            max_nav_bytes: None,
64        }
65    }
66}
67
68/// Compatibility open configuration for embedded-facing APIs.
69#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
70pub struct OpenConfig {
71    /// Baseline high-level open options.
72    pub options: EpubBookOptions,
73    /// When enabled, navigation parsing is deferred until `ensure_navigation`.
74    pub lazy_navigation: bool,
75}
76
77impl From<EpubBookOptions> for OpenConfig {
78    fn from(options: EpubBookOptions) -> Self {
79        Self {
80            options,
81            lazy_navigation: false,
82        }
83    }
84}
85
86/// Streaming chapter-event options for bounded extraction.
87#[derive(Clone, Copy, Debug, PartialEq)]
88pub struct ChapterEventsOptions {
89    /// Render-prep options used to produce event/run stream.
90    pub render: RenderPrepOptions,
91    /// Hard cap on emitted items.
92    pub max_items: usize,
93}
94
95impl Default for ChapterEventsOptions {
96    fn default() -> Self {
97        Self {
98            render: RenderPrepOptions::default(),
99            max_items: 131_072,
100        }
101    }
102}
103
104/// Options for bounded image resource reads.
105#[derive(Clone, Copy, Debug, PartialEq, Eq)]
106pub struct ImageReadOptions {
107    /// Hard cap on image payload bytes.
108    pub max_bytes: usize,
109    /// Whether SVG images are accepted.
110    pub allow_svg: bool,
111    /// Whether unknown/extension-only image resources are accepted.
112    pub allow_unknown_images: bool,
113}
114
115impl Default for ImageReadOptions {
116    fn default() -> Self {
117        Self {
118            max_bytes: 2 * 1024 * 1024,
119            allow_svg: true,
120            allow_unknown_images: false,
121        }
122    }
123}
124
125/// Strategy options for EPUB cover image discovery.
126#[derive(Clone, Copy, Debug, PartialEq, Eq)]
127pub struct CoverImageOptions {
128    /// Whether to prefer manifest-declared cover items before guide refs.
129    pub prefer_manifest_cover: bool,
130    /// Whether to inspect EPUB2 `<guide type="cover">` refs.
131    pub include_guide_refs: bool,
132    /// Whether to parse XHTML/HTML cover documents for nested `<img>`/`<svg:image>` refs.
133    pub parse_cover_documents: bool,
134    /// Hard cap for cover-document XHTML payload bytes.
135    pub max_cover_document_bytes: usize,
136    /// Image payload read options.
137    pub image: ImageReadOptions,
138}
139
140impl Default for CoverImageOptions {
141    fn default() -> Self {
142        Self {
143            prefer_manifest_cover: true,
144            include_guide_refs: true,
145            parse_cover_documents: true,
146            max_cover_document_bytes: 256 * 1024,
147            image: ImageReadOptions::default(),
148        }
149    }
150}
151
152/// Where a resolved cover image reference came from.
153#[derive(Clone, Copy, Debug, PartialEq, Eq)]
154pub enum CoverImageSource {
155    /// Manifest `cover-image` / cover-id resource.
156    Manifest,
157    /// EPUB2 `<guide type="cover">` resource.
158    Guide,
159    /// Nested image inside a cover XHTML/HTML document.
160    CoverDocument,
161}
162
163/// Resolved cover image metadata.
164#[derive(Clone, Debug, PartialEq, Eq)]
165pub struct CoverImageRef {
166    /// Canonical ZIP path for the resolved resource.
167    pub zip_path: String,
168    /// OPF-relative href when available in manifest; otherwise the resolved path.
169    pub href: String,
170    /// Declared or inferred media type.
171    pub media_type: Option<String>,
172    /// Discovery source.
173    pub source: CoverImageSource,
174}
175
176/// Options for streaming chapter event processing without full materialization.
177///
178/// This provides true streaming from ZIP with configurable chunk sizes and limits.
179#[derive(Clone, Debug)]
180pub struct StreamingChapterOptions {
181    /// Render-prep options for styling.
182    pub render_prep: RenderPrepOptions,
183    /// Hard cap on emitted items.
184    pub max_items: usize,
185    /// Maximum chapter entry size in bytes.
186    pub max_entry_bytes: usize,
187    /// Chunk size limits for incremental processing.
188    pub chunk_limits: Option<crate::streaming::ChunkLimits>,
189    /// Whether to extract stylesheets (requires additional reads).
190    pub load_stylesheets: bool,
191}
192
193impl Default for StreamingChapterOptions {
194    fn default() -> Self {
195        Self {
196            render_prep: RenderPrepOptions::default(),
197            max_items: 131_072,
198            max_entry_bytes: 4 * 1024 * 1024, // 4MB default
199            chunk_limits: None,               // Use defaults
200            load_stylesheets: false,          // Skip stylesheets for speed
201        }
202    }
203}
204
205impl StreamingChapterOptions {
206    /// Create embedded-friendly options with small chunks.
207    pub fn embedded() -> Self {
208        Self {
209            render_prep: RenderPrepOptions::default(),
210            max_items: 10_000,
211            max_entry_bytes: 512 * 1024, // 512KB max
212            chunk_limits: Some(crate::streaming::ChunkLimits::embedded()),
213            load_stylesheets: false,
214        }
215    }
216
217    /// Set explicit chunk limits.
218    pub fn with_chunk_limits(mut self, limits: crate::streaming::ChunkLimits) -> Self {
219        self.chunk_limits = Some(limits);
220        self
221    }
222
223    /// Enable/disable stylesheet loading.
224    pub fn with_stylesheets(mut self, load: bool) -> Self {
225        self.load_stylesheets = load;
226        self
227    }
228}
229
230/// Result from streaming chapter event processing.
231#[derive(Clone, Debug)]
232pub struct ChapterStreamResult {
233    /// Number of items emitted.
234    pub items_emitted: usize,
235    /// Total bytes read from chapter.
236    pub bytes_read: usize,
237    /// Whether streaming is complete.
238    pub complete: bool,
239}
240
241/// Builder for ergonomic high-level EPUB opening/parsing.
242#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
243pub struct EpubBookBuilder {
244    options: EpubBookOptions,
245}
246
247impl EpubBookBuilder {
248    /// Create a new builder with no explicit limits.
249    pub fn new() -> Self {
250        Self::default()
251    }
252
253    /// Set explicit ZIP limits.
254    pub fn with_zip_limits(mut self, limits: ZipLimits) -> Self {
255        self.options.zip_limits = Some(limits);
256        self
257    }
258
259    /// Enable strict validation mode.
260    pub fn strict(mut self) -> Self {
261        self.options.validation_mode = ValidationMode::Strict;
262        self
263    }
264
265    /// Set explicit validation mode.
266    pub fn validation_mode(mut self, mode: ValidationMode) -> Self {
267        self.options.validation_mode = mode;
268        self
269    }
270
271    /// Set an explicit navigation payload byte cap.
272    pub fn with_max_nav_bytes(mut self, max_nav_bytes: usize) -> Self {
273        self.options.max_nav_bytes = Some(max_nav_bytes);
274        self
275    }
276
277    /// Open an EPUB from a file path.
278    pub fn open<P: AsRef<Path>>(self, path: P) -> Result<EpubBook<File>, EpubError> {
279        EpubBook::open_with_options(path, self.options)
280    }
281
282    /// Open an EPUB from an arbitrary reader.
283    pub fn from_reader<R: Read + Seek>(self, reader: R) -> Result<EpubBook<R>, EpubError> {
284        EpubBook::from_reader_with_options(reader, self.options)
285    }
286
287    /// Parse summary metadata from a file path.
288    pub fn parse_file<P: AsRef<Path>>(self, path: P) -> Result<EpubSummary, EpubError> {
289        parse_epub_file_with_options(path, self.options)
290    }
291
292    /// Parse summary metadata from an arbitrary reader.
293    pub fn parse_reader<R: Read + Seek>(self, reader: R) -> Result<EpubSummary, EpubError> {
294        parse_epub_reader_with_options(reader, self.options)
295    }
296}
297
298/// Parsed top-level EPUB data for lightweight usage.
299#[derive(Clone, Debug, PartialEq, Eq)]
300pub struct EpubSummary {
301    metadata: EpubMetadata,
302    spine: Spine,
303    navigation: Option<Navigation>,
304}
305
306impl EpubSummary {
307    /// EPUB package metadata.
308    pub fn metadata(&self) -> &EpubMetadata {
309        &self.metadata
310    }
311
312    /// Reading order from `<spine>`.
313    pub fn spine(&self) -> &Spine {
314        &self.spine
315    }
316
317    /// Parsed navigation document, when one is available.
318    pub fn navigation(&self) -> Option<&Navigation> {
319        self.navigation.as_ref()
320    }
321}
322
323/// Parse an EPUB from any `Read + Seek` source.
324pub fn parse_epub_reader<R: Read + Seek>(reader: R) -> Result<EpubSummary, EpubError> {
325    parse_epub_reader_with_options(reader, EpubBookOptions::default())
326}
327
328/// Parse an EPUB from any `Read + Seek` source with explicit options.
329pub fn parse_epub_reader_with_options<R: Read + Seek>(
330    reader: R,
331    options: EpubBookOptions,
332) -> Result<EpubSummary, EpubError> {
333    let mut zip =
334        StreamingZip::new_with_limits(reader, options.zip_limits).map_err(EpubError::Zip)?;
335    load_summary_from_zip(&mut zip, options)
336}
337
338/// Parse an EPUB from a file path.
339pub fn parse_epub_file<P: AsRef<Path>>(path: P) -> Result<EpubSummary, EpubError> {
340    parse_epub_file_with_options(path, EpubBookOptions::default())
341}
342
343/// Parse an EPUB from a file path with explicit options.
344pub fn parse_epub_file_with_options<P: AsRef<Path>>(
345    path: P,
346    options: EpubBookOptions,
347) -> Result<EpubSummary, EpubError> {
348    let file = File::open(path).map_err(|e| EpubError::Io(e.to_string()))?;
349    parse_epub_reader_with_options(file, options)
350}
351
352/// High-level EPUB handle backed by an open ZIP reader.
353pub struct EpubBook<R: Read + Seek> {
354    zip: StreamingZip<R>,
355    opf_path: String,
356    metadata: EpubMetadata,
357    spine: Spine,
358    validation_mode: ValidationMode,
359    max_nav_bytes: Option<usize>,
360    navigation_loaded: bool,
361    navigation: Option<Navigation>,
362    embedded_fonts_cache: Option<Vec<EmbeddedFontFace>>,
363}
364
365/// Lightweight chapter descriptor in spine order.
366#[derive(Clone, Debug, PartialEq, Eq)]
367pub struct ChapterRef {
368    /// Spine position index.
369    pub index: usize,
370    /// Spine `idref`.
371    pub idref: String,
372    /// Manifest href relative to OPF.
373    pub href: String,
374    /// Manifest media type.
375    pub media_type: String,
376}
377
378/// Stable reading position with anchor + fallback offset information.
379#[derive(Clone, Debug, Default, PartialEq, Eq)]
380pub struct ReadingPosition {
381    /// 0-based chapter index in spine order.
382    pub chapter_index: usize,
383    /// Optional chapter href hint for robust restore across index shifts.
384    pub chapter_href: Option<String>,
385    /// Optional anchor payload (fragment id or CFI-like token).
386    pub anchor: Option<String>,
387    /// Fallback character offset in the chapter when anchor cannot be resolved.
388    pub fallback_offset: usize,
389}
390
391/// Semantic navigation primitive for seeking/resolve operations.
392#[derive(Clone, Debug, PartialEq, Eq)]
393pub enum Locator {
394    /// Resolve by chapter index.
395    Chapter(usize),
396    /// Resolve by chapter href (optionally with `#fragment`).
397    Href(String),
398    /// Resolve a fragment in the current chapter context.
399    Fragment(String),
400    /// Resolve by TOC id (mapped from nav href fragment or label).
401    TocId(String),
402    /// Resolve from a persisted reading position.
403    Position(ReadingPosition),
404}
405
406/// Fully resolved location information returned from locator APIs.
407#[derive(Clone, Debug, PartialEq, Eq)]
408pub struct ResolvedLocation {
409    /// Resolved chapter descriptor.
410    pub chapter: ChapterRef,
411    /// Optional resolved fragment (without leading '#').
412    pub fragment: Option<String>,
413    /// Canonical position payload for persistence.
414    pub position: ReadingPosition,
415}
416
417/// Lightweight mutable reading session detached from ZIP/file state.
418#[derive(Clone, Debug, PartialEq, Eq)]
419pub struct ReadingSession {
420    chapters: Vec<ChapterRef>,
421    navigation: Option<Navigation>,
422    current: ReadingPosition,
423}
424
425impl ReadingSession {
426    /// Create a reading session from chapter descriptors and optional navigation.
427    pub fn new(chapters: Vec<ChapterRef>, navigation: Option<Navigation>) -> Self {
428        let first_href = chapters.first().map(|c| c.href.clone());
429        Self {
430            chapters,
431            navigation,
432            current: ReadingPosition {
433                chapter_index: 0,
434                chapter_href: first_href,
435                anchor: None,
436                fallback_offset: 0,
437            },
438        }
439    }
440
441    /// Return current stable reading position.
442    pub fn current_position(&self) -> ReadingPosition {
443        self.current.clone()
444    }
445
446    /// Seek to an explicit reading position.
447    pub fn seek_position(&mut self, pos: &ReadingPosition) -> Result<(), EpubError> {
448        if pos.chapter_index >= self.chapters.len() {
449            return Err(EpubError::ChapterOutOfBounds {
450                index: pos.chapter_index,
451                chapter_count: self.chapters.len(),
452            });
453        }
454        self.current = pos.clone();
455        if self.current.chapter_href.is_none() {
456            self.current.chapter_href = Some(self.chapters[pos.chapter_index].href.clone());
457        }
458        Ok(())
459    }
460
461    /// Chapter-local progress ratio in `[0.0, 1.0]`.
462    pub fn chapter_progress(&self) -> f32 {
463        if self.chapters.is_empty() {
464            return 0.0;
465        }
466        if self.current.fallback_offset == 0 {
467            0.0
468        } else {
469            1.0
470        }
471    }
472
473    /// Whole-book progress ratio in `[0.0, 1.0]`.
474    pub fn book_progress(&self) -> f32 {
475        if self.chapters.is_empty() {
476            return 0.0;
477        }
478        let chapter_ratio = self.chapter_progress();
479        ((self.current.chapter_index as f32) + chapter_ratio) / (self.chapters.len() as f32)
480    }
481
482    /// Resolve a semantic locator to a concrete chapter/fragment location.
483    pub fn resolve_locator(&mut self, loc: Locator) -> Result<ResolvedLocation, EpubError> {
484        match loc {
485            Locator::Chapter(index) => {
486                let chapter =
487                    self.chapters
488                        .get(index)
489                        .cloned()
490                        .ok_or(EpubError::ChapterOutOfBounds {
491                            index,
492                            chapter_count: self.chapters.len(),
493                        })?;
494                self.current.chapter_index = index;
495                self.current.chapter_href = Some(chapter.href.clone());
496                self.current.anchor = None;
497                Ok(ResolvedLocation {
498                    chapter,
499                    fragment: None,
500                    position: self.current.clone(),
501                })
502            }
503            Locator::Href(href) => {
504                let (base, fragment) = split_href_fragment(&href);
505                let (index, chapter) = self
506                    .chapters
507                    .iter()
508                    .enumerate()
509                    .find(|(_, chapter)| chapter.href == base)
510                    .map(|(idx, chapter)| (idx, chapter.clone()))
511                    .ok_or_else(|| {
512                        EpubError::InvalidEpub(format!("unknown chapter href: {}", href))
513                    })?;
514                self.current.chapter_index = index;
515                self.current.chapter_href = Some(chapter.href.clone());
516                self.current.anchor = fragment.clone();
517                Ok(ResolvedLocation {
518                    chapter,
519                    fragment,
520                    position: self.current.clone(),
521                })
522            }
523            Locator::Fragment(fragment) => {
524                let idx = self
525                    .current
526                    .chapter_index
527                    .min(self.chapters.len().saturating_sub(1));
528                let chapter =
529                    self.chapters
530                        .get(idx)
531                        .cloned()
532                        .ok_or(EpubError::ChapterOutOfBounds {
533                            index: idx,
534                            chapter_count: self.chapters.len(),
535                        })?;
536                self.current.chapter_index = idx;
537                self.current.chapter_href = Some(chapter.href.clone());
538                self.current.anchor = Some(fragment.clone());
539                Ok(ResolvedLocation {
540                    chapter,
541                    fragment: Some(fragment),
542                    position: self.current.clone(),
543                })
544            }
545            Locator::TocId(id) => {
546                let nav = self.navigation.as_ref().ok_or_else(|| {
547                    EpubError::Navigation("no navigation document available".to_string())
548                })?;
549                let href = find_toc_href(nav, &id).ok_or_else(|| {
550                    EpubError::Navigation(format!("toc id/label not found: {}", id))
551                })?;
552                self.resolve_locator(Locator::Href(href))
553            }
554            Locator::Position(pos) => {
555                self.seek_position(&pos)?;
556                self.resolve_locator(Locator::Chapter(pos.chapter_index))
557            }
558        }
559    }
560}
561
562/// Resumable pagination session that tracks parse/layout state across page turns.
563///
564/// This session maintains cursor state so that page N+1 can continue from where
565/// page N left off without re-parsing the chapter from the start.
566#[derive(Clone, Debug)]
567pub struct PaginationSession {
568    /// Current chapter index.
569    chapter_index: usize,
570    /// Current byte offset in chapter source.
571    byte_offset: usize,
572    /// Current event/token index.
573    event_index: usize,
574    /// Element stack at current position.
575    element_stack: Vec<String>,
576    /// Current page number.
577    page_number: usize,
578    /// Whether chapter has been fully processed.
579    chapter_complete: bool,
580}
581
582impl PaginationSession {
583    /// Create a new pagination session starting at chapter 0.
584    pub fn new() -> Self {
585        Self {
586            chapter_index: 0,
587            byte_offset: 0,
588            event_index: 0,
589            element_stack: Vec::with_capacity(32),
590            page_number: 0,
591            chapter_complete: false,
592        }
593    }
594
595    /// Create session for a specific chapter and offset.
596    pub fn at_position(chapter: usize, byte_offset: usize, event_index: usize) -> Self {
597        Self {
598            chapter_index: chapter,
599            byte_offset,
600            event_index,
601            element_stack: Vec::with_capacity(32),
602            page_number: 0,
603            chapter_complete: false,
604        }
605    }
606
607    /// Get current chapter index.
608    pub fn chapter_index(&self) -> usize {
609        self.chapter_index
610    }
611
612    /// Get current byte offset.
613    pub fn byte_offset(&self) -> usize {
614        self.byte_offset
615    }
616
617    /// Get current event index.
618    pub fn event_index(&self) -> usize {
619        self.event_index
620    }
621
622    /// Get current page number.
623    pub fn page_number(&self) -> usize {
624        self.page_number
625    }
626
627    /// Check if current chapter is fully processed.
628    pub fn is_chapter_complete(&self) -> bool {
629        self.chapter_complete
630    }
631
632    /// Advance to next page, preserving cursor state.
633    pub fn next_page(&mut self) {
634        self.page_number += 1;
635    }
636
637    /// Advance cursor position.
638    pub fn advance(&mut self, bytes: usize, events: usize) {
639        self.byte_offset += bytes;
640        self.event_index += events;
641    }
642
643    /// Push element onto stack.
644    pub fn push_element(&mut self, tag: &str) {
645        self.element_stack.push(tag.to_string());
646    }
647
648    /// Pop element from stack.
649    pub fn pop_element(&mut self) -> Option<String> {
650        self.element_stack.pop()
651    }
652
653    /// Move to next chapter.
654    pub fn next_chapter(&mut self) {
655        self.chapter_index += 1;
656        self.byte_offset = 0;
657        self.event_index = 0;
658        self.element_stack.clear();
659        self.chapter_complete = false;
660    }
661
662    /// Mark chapter as complete.
663    pub fn mark_chapter_complete(&mut self) {
664        self.chapter_complete = true;
665    }
666
667    /// Reset for new chapter.
668    pub fn reset_chapter(&mut self, chapter_index: usize) {
669        self.chapter_index = chapter_index;
670        self.byte_offset = 0;
671        self.event_index = 0;
672        self.element_stack.clear();
673        self.chapter_complete = false;
674    }
675
676    /// Create a reading position from current state.
677    pub fn to_position(&self) -> ReadingPosition {
678        ReadingPosition {
679            chapter_index: self.chapter_index,
680            chapter_href: None,
681            anchor: None,
682            fallback_offset: self.byte_offset,
683        }
684    }
685}
686
687impl Default for PaginationSession {
688    fn default() -> Self {
689        Self::new()
690    }
691}
692
693fn split_href_fragment(href: &str) -> (String, Option<String>) {
694    if let Some((base, fragment)) = href.split_once('#') {
695        return (base.to_string(), Some(fragment.to_string()));
696    }
697    (href.to_string(), None)
698}
699
700fn find_toc_href(nav: &Navigation, id: &str) -> Option<String> {
701    fn visit(points: &[NavPoint], id: &str) -> Option<String> {
702        for point in points {
703            let (_, fragment) = split_href_fragment(&point.href);
704            if point.label == id || fragment.as_deref() == Some(id) {
705                return Some(point.href.clone());
706            }
707            if let Some(hit) = visit(&point.children, id) {
708                return Some(hit);
709            }
710        }
711        None
712    }
713    visit(&nav.toc, id)
714}
715
716impl EpubBook<File> {
717    /// Open an EPUB from disk and parse core structures.
718    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, EpubError> {
719        Self::open_with_options(path, EpubBookOptions::default())
720    }
721
722    /// Open an EPUB from disk with explicit options.
723    pub fn open_with_options<P: AsRef<Path>>(
724        path: P,
725        options: EpubBookOptions,
726    ) -> Result<Self, EpubError> {
727        let file = File::open(path).map_err(|e| EpubError::Io(e.to_string()))?;
728        Self::from_reader_with_options(file, options)
729    }
730
731    /// Open an EPUB from disk with compatibility open configuration.
732    pub fn open_with_config<P: AsRef<Path>>(
733        path: P,
734        config: OpenConfig,
735    ) -> Result<Self, EpubError> {
736        let file = File::open(path).map_err(|e| EpubError::Io(e.to_string()))?;
737        Self::from_reader_with_config(file, config)
738    }
739
740    /// Open an EPUB using SD card as extended memory (temp file backing).
741    ///
742    /// This is a memory-efficient alternative for embedded devices with limited RAM.
743    /// Instead of loading container.xml and OPF into RAM, it streams them to temp
744    /// files on the SD card and parses from there.
745    ///
746    /// # Memory behavior
747    /// - Peak RAM usage: ~4KB (ZIP directory) + small parsing buffers
748    /// - No large contiguous allocations for metadata files
749    /// - Temp files are cleaned up after parsing
750    ///
751    /// # Arguments
752    /// * `path` - Path to the EPUB file on SD card
753    /// * `temp_dir` - Directory for temporary files (should be on SD card)
754    /// * `config` - Open configuration (lazy_navigation recommended)
755    ///
756    /// # Example
757    /// ```no_run
758    /// use epub_stream::book::{EpubBook, OpenConfig, EpubBookOptions};
759    ///
760    /// let config = OpenConfig {
761    ///     options: EpubBookOptions::default(),
762    ///     lazy_navigation: true, // Defer TOC parsing to save RAM
763    /// };
764    /// let book = EpubBook::open_with_temp_storage(
765    ///     "/sd/books/mybook.epub",
766    ///     "/sd/.tmp",
767    ///     config
768    /// ).unwrap();
769    /// ```
770    #[cfg(feature = "std")]
771    pub fn open_with_temp_storage<EP: AsRef<Path>, TP: AsRef<Path>>(
772        epub_path: EP,
773        temp_dir: TP,
774        config: OpenConfig,
775    ) -> Result<Self, EpubError> {
776        use crate::metadata::{parse_container_xml_file, parse_opf_file};
777
778        let options = config.options;
779        let mut zip = StreamingZip::new_with_limits(
780            File::open(&epub_path).map_err(|e| EpubError::Io(e.to_string()))?,
781            options.zip_limits,
782        )
783        .map_err(EpubError::Zip)?;
784
785        zip.validate_mimetype().map_err(EpubError::Zip)?;
786
787        // Create temp file paths
788        let temp_dir = temp_dir.as_ref();
789        let container_temp = temp_dir.join(".epub_stream_container.xml");
790        let opf_temp = temp_dir.join(".epub_stream_opf.xml");
791
792        // Stream container.xml to temp file instead of loading into RAM
793        let mut container_file = File::create(&container_temp)
794            .map_err(|e| EpubError::Io(format!("Failed to create temp file: {}", e)))?;
795        read_entry_into(&mut zip, "META-INF/container.xml", &mut container_file)?;
796        drop(container_file);
797
798        // Parse container.xml from file to get OPF path
799        let opf_path = parse_container_xml_file(&container_temp)
800            .map_err(|e| EpubError::Parse(format!("Failed to parse container.xml: {}", e)))?;
801
802        // Clean up container temp file immediately
803        let _ = std::fs::remove_file(&container_temp);
804
805        // Get OPF entry info first (before we borrow zip mutably again)
806        let opf_entry = zip
807            .get_entry(&opf_path)
808            .ok_or(EpubError::Zip(ZipError::FileNotFound))?;
809
810        // Clone entry data we need (avoids borrow issues)
811        let opf_entry_data = CdEntry {
812            method: opf_entry.method,
813            compressed_size: opf_entry.compressed_size,
814            uncompressed_size: opf_entry.uncompressed_size,
815            local_header_offset: opf_entry.local_header_offset,
816            crc32: opf_entry.crc32,
817            filename: String::with_capacity(0),
818        };
819
820        // Stream OPF to temp file instead of loading into RAM
821        let mut opf_file = File::create(&opf_temp)
822            .map_err(|e| EpubError::Io(format!("Failed to create temp file: {}", e)))?;
823        zip.read_file_to_writer(&opf_entry_data, &mut opf_file)
824            .map_err(EpubError::Zip)?;
825        drop(opf_file);
826
827        // Parse OPF from file
828        let mut metadata = parse_opf_file(&opf_temp)
829            .map_err(|e| EpubError::Parse(format!("Failed to parse OPF: {}", e)))?;
830
831        // Store the OPF path in metadata
832        metadata.opf_path = Some(opf_path.clone());
833
834        // Parse spine from file to avoid full OPF buffering
835        let spine = crate::spine::parse_spine_file(&opf_temp)?;
836
837        // Clean up OPF temp file
838        let _ = std::fs::remove_file(&opf_temp);
839
840        validate_open_invariants(&metadata, &spine, options.validation_mode)?;
841
842        // Navigation is deferred if lazy_navigation is enabled
843        let (navigation, navigation_loaded) = if config.lazy_navigation {
844            (None, false)
845        } else {
846            (
847                parse_navigation(
848                    &mut zip,
849                    &metadata,
850                    &spine,
851                    &opf_path,
852                    options.validation_mode,
853                    options.max_nav_bytes,
854                )?,
855                true,
856            )
857        };
858
859        Ok(Self {
860            zip,
861            opf_path,
862            metadata,
863            spine,
864            validation_mode: options.validation_mode,
865            max_nav_bytes: options.max_nav_bytes,
866            navigation_loaded,
867            navigation,
868            embedded_fonts_cache: None,
869        })
870    }
871}
872
873impl<R: Read + Seek> EpubBook<R> {
874    /// Open an EPUB from any `Read + Seek` source and parse core structures.
875    ///
876    /// # Allocation behavior
877    /// - Bounded by `ZipLimits` in options
878    /// - Allocates central directory cache (~4KB fixed)
879    /// - Worst-case: ~10KB for metadata + navigation
880    pub fn from_reader(reader: R) -> Result<Self, EpubError> {
881        Self::from_reader_with_options(reader, EpubBookOptions::default())
882    }
883
884    /// Open an EPUB from any `Read + Seek` source and parse core structures.
885    ///
886    /// # Allocation behavior
887    /// - Bounded by `ZipLimits` in options
888    /// - Caller buffer required: No
889    /// - Worst-case memory: Configurable via `options.zip_limits`
890    pub fn from_reader_with_options(
891        reader: R,
892        options: EpubBookOptions,
893    ) -> Result<Self, EpubError> {
894        Self::from_reader_with_config(reader, OpenConfig::from(options))
895    }
896
897    /// Open an EPUB from any `Read + Seek` source with compatibility open configuration.
898    ///
899    /// # Allocation behavior
900    /// - Bounded by `ZipLimits` in config.options
901    /// - Supports lazy navigation loading to defer allocation
902    /// - Caller buffer required: No
903    pub fn from_reader_with_config(reader: R, config: OpenConfig) -> Result<Self, EpubError> {
904        let options = config.options;
905        let mut zip =
906            StreamingZip::new_with_limits(reader, options.zip_limits).map_err(EpubError::Zip)?;
907        zip.validate_mimetype().map_err(EpubError::Zip)?;
908
909        let container = read_entry(&mut zip, "META-INF/container.xml")?;
910        let opf_path = crate::metadata::parse_container_xml(&container)?;
911        let opf = read_entry(&mut zip, &opf_path)?;
912        let metadata = extract_metadata(&container, &opf)?;
913        let spine = crate::spine::parse_spine(&opf)?;
914        validate_open_invariants(&metadata, &spine, options.validation_mode)?;
915        let (navigation, navigation_loaded) = if config.lazy_navigation {
916            (None, false)
917        } else {
918            (
919                parse_navigation(
920                    &mut zip,
921                    &metadata,
922                    &spine,
923                    &opf_path,
924                    options.validation_mode,
925                    options.max_nav_bytes,
926                )?,
927                true,
928            )
929        };
930
931        Ok(Self {
932            zip,
933            opf_path,
934            metadata,
935            spine,
936            validation_mode: options.validation_mode,
937            max_nav_bytes: options.max_nav_bytes,
938            navigation_loaded,
939            navigation,
940            embedded_fonts_cache: None,
941        })
942    }
943
944    /// EPUB package metadata.
945    pub fn metadata(&self) -> &EpubMetadata {
946        &self.metadata
947    }
948
949    /// Convenience: metadata title.
950    pub fn title(&self) -> &str {
951        self.metadata.title.as_str()
952    }
953
954    /// Convenience: metadata author.
955    pub fn author(&self) -> &str {
956        self.metadata.author.as_str()
957    }
958
959    /// Convenience: metadata language.
960    pub fn language(&self) -> &str {
961        self.metadata.language.as_str()
962    }
963
964    /// Reading order from `<spine>`.
965    pub fn spine(&self) -> &Spine {
966        &self.spine
967    }
968
969    /// Parsed navigation document, when one is available.
970    pub fn navigation(&self) -> Option<&Navigation> {
971        self.navigation.as_ref()
972    }
973
974    /// Lazily parse and cache navigation data when not loaded yet.
975    pub fn ensure_navigation(&mut self) -> Result<Option<&Navigation>, EpubError> {
976        if !self.navigation_loaded {
977            self.navigation = parse_navigation(
978                &mut self.zip,
979                &self.metadata,
980                &self.spine,
981                &self.opf_path,
982                self.validation_mode,
983                self.max_nav_bytes,
984            )?;
985            self.navigation_loaded = true;
986        }
987        Ok(self.navigation.as_ref())
988    }
989
990    /// Convenience: top-level TOC entries from parsed navigation.
991    pub fn toc(&self) -> Option<&[NavPoint]> {
992        self.navigation.as_ref().map(|n| n.toc.as_slice())
993    }
994
995    /// Number of entries in the spine reading order.
996    pub fn chapter_count(&self) -> usize {
997        self.spine.len()
998    }
999
1000    /// Return the uncompressed byte size for a chapter entry.
1001    ///
1002    /// This enables embedded callers to pre-size reusable chapter buffers
1003    /// before invoking chapter streaming/render flows.
1004    pub fn chapter_uncompressed_size(&mut self, index: usize) -> Result<usize, EpubError> {
1005        let chapter = self.chapter(index)?;
1006        let zip_path = resolve_opf_relative_path(&self.opf_path, &chapter.href);
1007        let entry = self
1008            .zip
1009            .get_entry(&zip_path)
1010            .ok_or(EpubError::Zip(ZipError::FileNotFound))?;
1011        usize::try_from(entry.uncompressed_size).map_err(|_| EpubError::Zip(ZipError::FileTooLarge))
1012    }
1013
1014    /// Create a detached reading session for locator/progress operations.
1015    pub fn reading_session(&self) -> ReadingSession {
1016        ReadingSession::new(self.chapters().collect(), self.navigation.clone())
1017    }
1018
1019    /// Enumerate chapters in spine order.
1020    pub fn chapters(&self) -> impl Iterator<Item = ChapterRef> + '_ {
1021        self.spine
1022            .items()
1023            .iter()
1024            .enumerate()
1025            .filter_map(|(index, spine_item)| {
1026                self.metadata
1027                    .get_item(&spine_item.idref)
1028                    .map(|manifest_item| ChapterRef {
1029                        index,
1030                        idref: spine_item.idref.clone(),
1031                        href: manifest_item.href.clone(),
1032                        media_type: manifest_item.media_type.clone(),
1033                    })
1034            })
1035    }
1036
1037    /// Get a chapter descriptor by spine index.
1038    pub fn chapter(&self, index: usize) -> Result<ChapterRef, EpubError> {
1039        let spine_item = self
1040            .spine
1041            .get_item(index)
1042            .ok_or(EpubError::ChapterOutOfBounds {
1043                index,
1044                chapter_count: self.spine.len(),
1045            })?;
1046
1047        let manifest_item = self.metadata.get_item(&spine_item.idref).ok_or_else(|| {
1048            EpubError::ManifestItemMissing {
1049                idref: spine_item.idref.clone(),
1050            }
1051        })?;
1052
1053        Ok(ChapterRef {
1054            index,
1055            idref: spine_item.idref.clone(),
1056            href: manifest_item.href.clone(),
1057            media_type: manifest_item.media_type.clone(),
1058        })
1059    }
1060
1061    /// Get a chapter descriptor by spine `idref`.
1062    pub fn chapter_by_id(&self, idref: &str) -> Result<ChapterRef, EpubError> {
1063        let index = self
1064            .spine
1065            .items()
1066            .iter()
1067            .position(|item| item.idref == idref)
1068            .ok_or_else(|| EpubError::ManifestItemMissing {
1069                idref: idref.to_string(),
1070            })?;
1071        self.chapter(index)
1072    }
1073
1074    /// Read a resource by OPF-relative href into a new `Vec<u8>`.
1075    ///
1076    /// Fragment suffixes (e.g. `chapter.xhtml#p3`) are ignored.
1077    ///
1078    /// # Allocation behavior
1079    /// - **Allocates**: Returns new `Vec<u8>`
1080    /// - **Non-embedded-fast-path**: Use `read_resource_into` for embedded
1081    /// - Caller buffer required: No
1082    /// - Worst-case memory: Unbounded (depends on file size)
1083    ///
1084    /// For bounded allocation, use `read_resource_into_with_limit`.
1085    pub fn read_resource(&mut self, href: &str) -> Result<Vec<u8>, EpubError> {
1086        let mut out = Vec::with_capacity(0);
1087        self.read_resource_into(href, &mut out)?;
1088        Ok(out)
1089    }
1090
1091    /// Stream a resource by OPF-relative href into a writer.
1092    ///
1093    /// Fragment suffixes (e.g. `chapter.xhtml#p3`) are ignored.
1094    ///
1095    /// # Allocation behavior
1096    /// - **Zero hidden allocations**: Uses bounded internal buffers
1097    /// - Caller buffer required: Yes (writer handles output)
1098    /// - **Preferred for embedded**: Streaming API
1099    pub fn read_resource_into<W: Write>(
1100        &mut self,
1101        href: &str,
1102        writer: &mut W,
1103    ) -> Result<usize, EpubError> {
1104        self.read_resource_into_with_hard_cap(href, writer, usize::MAX)
1105    }
1106
1107    /// Stream a resource by OPF-relative href into a writer with an explicit cap.
1108    ///
1109    /// Fragment suffixes (e.g. `chapter.xhtml#p3`) are ignored.
1110    pub fn read_resource_into_with_limit<W: Write>(
1111        &mut self,
1112        href: &str,
1113        writer: &mut W,
1114        max_bytes: usize,
1115    ) -> Result<usize, EpubError> {
1116        self.read_resource_into_with_hard_cap(href, writer, max_bytes)
1117    }
1118
1119    /// Stream a resource by OPF-relative href with a hard cap.
1120    ///
1121    /// Fragment suffixes (e.g. `chapter.xhtml#p3`) are ignored.
1122    pub fn read_resource_into_with_hard_cap<W: Write>(
1123        &mut self,
1124        href: &str,
1125        writer: &mut W,
1126        hard_cap_bytes: usize,
1127    ) -> Result<usize, EpubError> {
1128        let zip_path = resolve_opf_relative_path(&self.opf_path, href);
1129        read_entry_into_with_limit(&mut self.zip, &zip_path, writer, hard_cap_bytes)
1130    }
1131
1132    /// Read spine item content bytes by index.
1133    pub fn read_spine_item_bytes(&mut self, index: usize) -> Result<Vec<u8>, EpubError> {
1134        let href = self.chapter(index)?.href;
1135
1136        self.read_resource(&href)
1137    }
1138
1139    /// Read an image resource with explicit media/size constraints.
1140    ///
1141    /// This is a bounded convenience API for consumers that need image bytes
1142    /// (cover extraction, inline-image prefetch, etc.) while keeping policy
1143    /// and limits in `epub-stream` rather than downstream crates.
1144    pub fn read_image_resource_into_with_options(
1145        &mut self,
1146        href: &str,
1147        out: &mut Vec<u8>,
1148        options: ImageReadOptions,
1149    ) -> Result<usize, EpubError> {
1150        let zip_path = resolve_opf_relative_path(&self.opf_path, href);
1151        let media_type = self
1152            .manifest_item_by_zip_path(&zip_path)
1153            .map(|item| item.media_type.as_str());
1154        if !is_supported_image_resource(media_type, &zip_path, options) {
1155            return Err(EpubError::Parse(format!(
1156                "resource is not an allowed image: {}",
1157                href
1158            )));
1159        }
1160        read_entry_into_with_limit(&mut self.zip, &zip_path, out, options.max_bytes)
1161    }
1162
1163    /// Resolve cover image metadata using manifest/guide/XHTML hints.
1164    pub fn cover_image_ref(&mut self) -> Result<Option<CoverImageRef>, EpubError> {
1165        self.cover_image_ref_with_options(CoverImageOptions::default())
1166    }
1167
1168    /// Resolve cover image metadata using explicit options.
1169    pub fn cover_image_ref_with_options(
1170        &mut self,
1171        options: CoverImageOptions,
1172    ) -> Result<Option<CoverImageRef>, EpubError> {
1173        #[derive(Clone)]
1174        struct Candidate {
1175            href: String,
1176            media_type: Option<String>,
1177            source: CoverImageSource,
1178        }
1179
1180        let mut candidates: Vec<Candidate> = Vec::with_capacity(4);
1181        if let Some(item) = self.metadata.get_cover_item() {
1182            candidates.push(Candidate {
1183                href: item.href.clone(),
1184                media_type: Some(item.media_type.clone()),
1185                source: CoverImageSource::Manifest,
1186            });
1187        }
1188        if options.include_guide_refs {
1189            for guide_ref in &self.metadata.guide {
1190                if !guide_ref.guide_type.eq_ignore_ascii_case("cover") {
1191                    continue;
1192                }
1193                if candidates
1194                    .iter()
1195                    .any(|existing| existing.href == guide_ref.href)
1196                {
1197                    continue;
1198                }
1199                candidates.push(Candidate {
1200                    href: guide_ref.href.clone(),
1201                    media_type: None,
1202                    source: CoverImageSource::Guide,
1203                });
1204            }
1205        }
1206        if !options.prefer_manifest_cover && !candidates.is_empty() {
1207            candidates.rotate_left(1);
1208        }
1209
1210        let mut doc_buf = Vec::with_capacity(0);
1211        for candidate in candidates {
1212            let zip_path = resolve_opf_relative_path(&self.opf_path, &candidate.href);
1213            if is_supported_image_resource(
1214                candidate.media_type.as_deref(),
1215                &zip_path,
1216                options.image,
1217            ) {
1218                let media_type =
1219                    normalized_image_media_type(candidate.media_type.as_deref(), &zip_path);
1220                return Ok(Some(CoverImageRef {
1221                    zip_path,
1222                    href: candidate.href,
1223                    media_type,
1224                    source: candidate.source,
1225                }));
1226            }
1227            if !options.parse_cover_documents
1228                || !is_cover_document(candidate.media_type.as_deref(), &zip_path)
1229            {
1230                continue;
1231            }
1232            doc_buf.clear();
1233            if read_entry_into_with_limit(
1234                &mut self.zip,
1235                &zip_path,
1236                &mut doc_buf,
1237                options.max_cover_document_bytes,
1238            )
1239            .is_err()
1240            {
1241                continue;
1242            }
1243            let Some(nested_href) =
1244                crate::metadata::extract_cover_image_href_from_xhtml(doc_buf.as_slice())
1245            else {
1246                continue;
1247            };
1248            let nested_zip_path = resolve_opf_relative_path(&zip_path, &nested_href);
1249            let nested_manifest_item = self.manifest_item_by_zip_path(&nested_zip_path);
1250            let nested_media = nested_manifest_item.map(|item| item.media_type.as_str());
1251            if !is_supported_image_resource(nested_media, &nested_zip_path, options.image) {
1252                continue;
1253            }
1254            let media_type = normalized_image_media_type(nested_media, &nested_zip_path);
1255            let href = nested_manifest_item
1256                .map(|item| item.href.clone())
1257                .unwrap_or_else(|| nested_zip_path.clone());
1258            return Ok(Some(CoverImageRef {
1259                zip_path: nested_zip_path,
1260                href,
1261                media_type,
1262                source: CoverImageSource::CoverDocument,
1263            }));
1264        }
1265
1266        Ok(None)
1267    }
1268
1269    /// Read resolved cover image bytes into caller-owned buffer.
1270    ///
1271    /// Returns `Ok(None)` when no supported cover image could be resolved.
1272    pub fn read_cover_image_into_with_options(
1273        &mut self,
1274        out: &mut Vec<u8>,
1275        options: CoverImageOptions,
1276    ) -> Result<Option<CoverImageRef>, EpubError> {
1277        let Some(cover) = self.cover_image_ref_with_options(options)? else {
1278            return Ok(None);
1279        };
1280        out.clear();
1281        read_entry_into_with_limit(&mut self.zip, &cover.zip_path, out, options.image.max_bytes)?;
1282        Ok(Some(cover))
1283    }
1284
1285    /// Read resolved cover image bytes with default options.
1286    pub fn read_cover_image_into(
1287        &mut self,
1288        out: &mut Vec<u8>,
1289    ) -> Result<Option<CoverImageRef>, EpubError> {
1290        self.read_cover_image_into_with_options(out, CoverImageOptions::default())
1291    }
1292
1293    fn manifest_item_by_zip_path(&self, zip_path: &str) -> Option<&crate::metadata::ManifestItem> {
1294        self.metadata.manifest.iter().find(|item| {
1295            let item_zip_path = resolve_opf_relative_path(&self.opf_path, &item.href);
1296            item_zip_path == zip_path
1297        })
1298    }
1299
1300    /// Read a spine chapter as UTF-8 HTML/XHTML text by index.
1301    ///
1302    /// # Allocation behavior
1303    /// - **Allocates**: Returns new `String`
1304    /// - **Non-embedded-fast-path**: Use `chapter_html_into` for embedded
1305    /// - Caller buffer required: No
1306    /// - Worst-case memory: Depends on chapter size
1307    ///
1308    /// For bounded allocation, use `chapter_html_into_with_limit`.
1309    pub fn chapter_html(&mut self, index: usize) -> Result<String, EpubError> {
1310        let mut out = String::with_capacity(0);
1311        self.chapter_html_into(index, &mut out)?;
1312        Ok(out)
1313    }
1314
1315    /// Read a spine chapter as UTF-8 HTML/XHTML text into caller-provided output.
1316    ///
1317    /// # Allocation behavior
1318    /// - **Zero hidden allocations**: Reuses caller's String buffer
1319    /// - Caller buffer required: Yes
1320    /// - **Preferred for embedded**: Buffer reuse API
1321    pub fn chapter_html_into(&mut self, index: usize, out: &mut String) -> Result<(), EpubError> {
1322        self.chapter_html_into_with_limit(index, usize::MAX, out)
1323    }
1324
1325    /// Read a spine chapter as UTF-8 HTML/XHTML text with a hard byte cap into caller output.
1326    pub fn chapter_html_into_with_limit(
1327        &mut self,
1328        index: usize,
1329        max_bytes: usize,
1330        out: &mut String,
1331    ) -> Result<(), EpubError> {
1332        out.clear();
1333        let chapter = self.chapter(index)?;
1334        let mut bytes = Vec::with_capacity(0);
1335        self.read_resource_into_with_hard_cap(&chapter.href, &mut bytes, max_bytes)?;
1336        let mut html = String::from_utf8(bytes)
1337            .map_err(|_| EpubError::ChapterNotUtf8 { href: chapter.href })?;
1338        core::mem::swap(out, &mut html);
1339        Ok(())
1340    }
1341
1342    /// Resolve chapter stylesheet sources in cascade order.
1343    pub fn chapter_stylesheets(&mut self, index: usize) -> Result<ChapterStylesheets, EpubError> {
1344        self.chapter_stylesheets_with_options(index, StyleLimits::default())
1345    }
1346
1347    /// Resolve chapter stylesheet sources in cascade order with explicit limits.
1348    pub fn chapter_stylesheets_with_options(
1349        &mut self,
1350        index: usize,
1351        limits: StyleLimits,
1352    ) -> Result<ChapterStylesheets, EpubError> {
1353        let chapter = self.chapter(index)?;
1354        let html = self.chapter_html(index)?;
1355        let links = parse_stylesheet_links(&chapter.href, &html);
1356        let mut sources = Vec::with_capacity(0);
1357
1358        for href in links {
1359            let bytes = self.read_resource(&href)?;
1360            if bytes.len() > limits.max_css_bytes {
1361                return Err(EpubError::Parse(format!(
1362                    "Stylesheet exceeds max_css_bytes ({} > {}) at '{}'",
1363                    bytes.len(),
1364                    limits.max_css_bytes,
1365                    href
1366                )));
1367            }
1368            let css = String::from_utf8(bytes)
1369                .map_err(|_| EpubError::Parse(format!("Stylesheet is not UTF-8: {}", href)))?;
1370            sources.push(StylesheetSource { href, css });
1371        }
1372
1373        Ok(ChapterStylesheets { sources })
1374    }
1375
1376    /// Backward-compatible alias for chapter stylesheet discovery with explicit limits.
1377    pub fn styles_for_chapter(
1378        &mut self,
1379        index: usize,
1380        limits: StyleLimits,
1381    ) -> Result<ChapterStylesheets, EpubError> {
1382        self.chapter_stylesheets_with_options(index, limits)
1383    }
1384
1385    /// Resolve chapter stylesheet sources with caller-provided buffers.
1386    ///
1387    /// This variant reuses the provided buffer for each stylesheet read, avoiding
1388    /// repeated allocations in the hot path.
1389    ///
1390    /// # Allocation behavior
1391    /// - **Zero per-stylesheet allocations**: Reuses caller-provided buffer
1392    /// - Caller buffer required: Yes (scratch_buf for I/O)
1393    /// - **Preferred for embedded**: Avoids allocation per stylesheet
1394    pub fn chapter_stylesheets_with_scratch(
1395        &mut self,
1396        index: usize,
1397        limits: StyleLimits,
1398        scratch_buf: &mut Vec<u8>,
1399    ) -> Result<ChapterStylesheets, EpubError> {
1400        let chapter = self.chapter(index)?;
1401        let html = self.chapter_html(index)?;
1402        let links = parse_stylesheet_links(&chapter.href, &html);
1403        let mut sources = Vec::with_capacity(links.len());
1404
1405        for href in links {
1406            scratch_buf.clear();
1407            self.read_resource_into(&href, scratch_buf)
1408                .map_err(|_| EpubError::Zip(ZipError::FileNotFound))?;
1409
1410            if scratch_buf.len() > limits.max_css_bytes {
1411                return Err(EpubError::LimitExceeded {
1412                    kind: LimitKind::CssSize,
1413                    actual: scratch_buf.len(),
1414                    limit: limits.max_css_bytes,
1415                    path: Some(href.clone()),
1416                });
1417            }
1418
1419            let css = String::from_utf8(scratch_buf.clone())
1420                .map_err(|_| EpubError::ChapterNotUtf8 { href: href.clone() })?;
1421
1422            sources.push(StylesheetSource { href, css });
1423        }
1424
1425        Ok(ChapterStylesheets { sources })
1426    }
1427
1428    /// Enumerate embedded font-face metadata from EPUB CSS resources.
1429    pub fn embedded_fonts(&mut self) -> Result<Vec<EmbeddedFontFace>, EpubError> {
1430        self.embedded_fonts_with_limits(FontLimits::default())
1431    }
1432
1433    /// Enumerate embedded font-face metadata with explicit limits.
1434    pub fn embedded_fonts_with_options(
1435        &mut self,
1436        limits: FontLimits,
1437    ) -> Result<Vec<EmbeddedFontFace>, EpubError> {
1438        self.embedded_fonts_with_limits(limits)
1439    }
1440
1441    /// Enumerate embedded font-face metadata with explicit limits.
1442    ///
1443    /// This path lazily scans CSS once and reuses cached face metadata on subsequent calls.
1444    pub fn embedded_fonts_with_limits(
1445        &mut self,
1446        limits: FontLimits,
1447    ) -> Result<Vec<EmbeddedFontFace>, EpubError> {
1448        let faces = self.ensure_embedded_fonts_loaded()?;
1449        if faces.len() > limits.max_faces {
1450            return Err(EpubError::LimitExceeded {
1451                kind: LimitKind::FontLimit,
1452                actual: faces.len(),
1453                limit: limits.max_faces,
1454                path: None,
1455            });
1456        }
1457        Ok(faces.clone())
1458    }
1459
1460    /// Enumerate embedded font-face metadata with caller-provided buffer.
1461    ///
1462    /// This variant scans CSS files without caching, using a caller-provided buffer
1463    /// to avoid allocations. Suitable for one-time font enumeration in constrained
1464    /// environments where caching is not desired.
1465    ///
1466    /// # Allocation behavior
1467    /// - **Zero per-CSS allocations**: Reuses caller-provided buffer
1468    /// - Caller buffer required: Yes (scratch_buf for I/O)
1469    /// - No caching: Always reads from archive
1470    pub fn embedded_fonts_with_scratch(
1471        &mut self,
1472        limits: FontLimits,
1473        scratch_buf: &mut Vec<u8>,
1474    ) -> Result<Vec<EmbeddedFontFace>, EpubError> {
1475        let css_hrefs: Vec<String> = self
1476            .metadata
1477            .manifest
1478            .iter()
1479            .filter(|item| item.media_type == "text/css")
1480            .map(|item| item.href.clone())
1481            .collect();
1482
1483        let mut out = Vec::with_capacity(limits.max_faces.min(16));
1484
1485        for href in css_hrefs {
1486            if out.len() >= limits.max_faces {
1487                break;
1488            }
1489
1490            scratch_buf.clear();
1491            match self.read_resource_into(&href, scratch_buf) {
1492                Ok(_) => {}
1493                Err(_) => continue, // Skip missing CSS files
1494            }
1495
1496            if scratch_buf.len() > limits.max_bytes_per_font {
1497                continue; // Skip oversized CSS
1498            }
1499
1500            let css = match String::from_utf8(scratch_buf.clone()) {
1501                Ok(s) => s,
1502                Err(_) => continue, // Skip non-UTF8 CSS
1503            };
1504
1505            let faces = parse_font_faces_from_css(&href, &css);
1506            for face in faces {
1507                if out.len() >= limits.max_faces {
1508                    break;
1509                }
1510                out.push(face);
1511            }
1512        }
1513
1514        Ok(out)
1515    }
1516
1517    /// Style chapter content into an event/run stream with default options.
1518    ///
1519    /// # Allocation behavior
1520    /// - **Allocates**: Returns `StyledChapter` with internal Vec
1521    /// - **Non-embedded-fast-path**: Use `chapter_events` for streaming
1522    /// - Caller buffer required: No
1523    /// - Worst-case memory: Depends on `MemoryBudget` in options
1524    pub fn chapter_styled_runs(&mut self, index: usize) -> Result<StyledChapter, EpubError> {
1525        self.chapter_styled_runs_with_options(index, RenderPrepOptions::default())
1526    }
1527
1528    /// Style chapter content into an event/run stream with explicit options.
1529    ///
1530    /// # Allocation behavior
1531    /// - **Bounded by limits**: Respects `MemoryBudget` in options
1532    /// - Caller buffer required: No
1533    /// - Worst-case memory: Configurable via `options.memory`
1534    pub fn chapter_styled_runs_with_options(
1535        &mut self,
1536        index: usize,
1537        options: RenderPrepOptions,
1538    ) -> Result<StyledChapter, EpubError> {
1539        let mut prep = RenderPrep::new(options).with_serif_default();
1540        let prepared = prep.prepare_chapter(self, index).map_err(EpubError::from)?;
1541        let mut items = Vec::with_capacity(0);
1542        for item in prepared.iter() {
1543            items.push(item.clone());
1544        }
1545        Ok(StyledChapter::from_items(items))
1546    }
1547
1548    /// Stream chapter style events/runs via callback with bounded item emission.
1549    ///
1550    /// # Allocation behavior
1551    /// - **Zero hidden allocations**: Uses bounded internal buffers
1552    /// - Caller buffer required: No (callback receives items)
1553    /// - **Preferred for embedded**: Streaming API with item caps
1554    /// - Worst-case memory: Bounded by `opts.render.memory`
1555    pub fn chapter_events<F>(
1556        &mut self,
1557        index: usize,
1558        opts: ChapterEventsOptions,
1559        mut on_item: F,
1560    ) -> Result<usize, EpubError>
1561    where
1562        F: FnMut(StyledEventOrRun) -> Result<(), EpubError>,
1563    {
1564        let mut prep = RenderPrep::new(opts.render).with_serif_default();
1565        let mut emitted = 0usize;
1566        let mut callback_error: Option<EpubError> = None;
1567        let mut hit_cap = false;
1568
1569        prep.prepare_chapter_with(self, index, |item| {
1570            if callback_error.is_some() || hit_cap {
1571                return;
1572            }
1573            if emitted >= opts.max_items {
1574                hit_cap = true;
1575                return;
1576            }
1577            if let Err(err) = on_item(item) {
1578                callback_error = Some(err);
1579                return;
1580            }
1581            emitted += 1;
1582        })
1583        .map_err(EpubError::from)?;
1584
1585        if let Some(err) = callback_error {
1586            return Err(err);
1587        }
1588        if hit_cap {
1589            // TODO: RenderPrep callbacks cannot currently short-circuit parsing.
1590            // This cap bounds emitted output, but upstream tokenization keeps scanning.
1591            return Err(EpubError::Parse(format!(
1592                "Chapter event count exceeded max_items ({})",
1593                opts.max_items
1594            )));
1595        }
1596        Ok(emitted)
1597    }
1598
1599    /// Stream chapter events with caller-provided scratch buffers.
1600    ///
1601    /// This is the most memory-efficient API for processing chapter content. It uses
1602    /// caller-provided buffers for all internal operations, avoiding any hidden allocations
1603    /// in the hot path.
1604    ///
1605    /// # Allocation behavior
1606    /// - **Zero hidden allocations**: Uses only caller-provided buffers
1607    /// - Caller buffer required: Yes (chapter_buf, token_buf, scratch)
1608    /// - **Preferred for embedded**: True zero-allocation streaming path
1609    /// - Worst-case memory: Bounded by provided buffer sizes
1610    ///
1611    /// # Errors
1612    /// Returns `EpubError::BufferTooSmall` if provided buffers are insufficient.
1613    /// Returns `EpubError::LimitExceeded` if hard caps are reached.
1614    #[inline(never)]
1615    pub fn chapter_events_with_scratch<F>(
1616        &mut self,
1617        index: usize,
1618        opts: ChapterEventsOptions,
1619        chapter_buf: &mut Vec<u8>,
1620        scratch: &mut crate::streaming::ScratchBuffers,
1621        mut on_item: F,
1622    ) -> Result<ChapterStreamResult, EpubError>
1623    where
1624        F: FnMut(StyledEventOrRun) -> Result<(), EpubError>,
1625    {
1626        use crate::zip::CdEntry;
1627
1628        // Clear buffers for reuse
1629        chapter_buf.clear();
1630        scratch.clear();
1631
1632        let chapter = self.chapter(index)?;
1633        let href = chapter.href;
1634        let zip_path = resolve_opf_relative_path(&self.opf_path, &href);
1635
1636        // Get ZIP entry
1637        let entry = self
1638            .zip
1639            .get_entry(&zip_path)
1640            .ok_or(EpubError::Zip(ZipError::FileNotFound))?
1641            .clone();
1642
1643        // Check hard caps before reading
1644        let uncompressed = usize::try_from(entry.uncompressed_size)
1645            .map_err(|_| EpubError::Zip(ZipError::FileTooLarge))?;
1646
1647        // Check ZIP limits
1648        if let Some(limits) = self.zip.limits() {
1649            if uncompressed > limits.max_file_read_size {
1650                return Err(EpubError::LimitExceeded {
1651                    kind: LimitKind::FileSize,
1652                    actual: uncompressed,
1653                    limit: limits.max_file_read_size,
1654                    path: Some(zip_path),
1655                });
1656            }
1657        }
1658
1659        // Check memory budget
1660        if uncompressed > opts.render.memory.max_entry_bytes {
1661            return Err(EpubError::LimitExceeded {
1662                kind: LimitKind::MemoryBudget,
1663                actual: uncompressed,
1664                limit: opts.render.memory.max_entry_bytes,
1665                path: Some(zip_path),
1666            });
1667        }
1668
1669        // Check if chapter fits in provided buffer
1670        if uncompressed > chapter_buf.capacity() {
1671            return Err(EpubError::BufferTooSmall {
1672                required: uncompressed,
1673                provided: chapter_buf.capacity(),
1674                context: "chapter_buf".to_string(),
1675            });
1676        }
1677
1678        // Read chapter into caller-provided buffer using scratch for I/O
1679        let use_entry = CdEntry {
1680            filename: String::with_capacity(0),
1681            method: entry.method,
1682            compressed_size: entry.compressed_size,
1683            uncompressed_size: entry.uncompressed_size,
1684            local_header_offset: entry.local_header_offset,
1685            crc32: entry.crc32,
1686        };
1687
1688        // `ZipArchive::read_file_with_scratch` consumes `&mut [u8]` and therefore
1689        // uses slice length (not Vec capacity) as the writable output window.
1690        // Ensure the Vec length matches the entry size before decompression while
1691        // staying within the pre-validated capacity budget.
1692        if chapter_buf.len() < uncompressed {
1693            chapter_buf.resize(uncompressed, 0);
1694        }
1695        if scratch.read_buf.is_empty() {
1696            // `read_file_with_scratch` requires a non-empty input slice and uses
1697            // slice length as the compressed read chunk size.
1698            let read_chunk = scratch.read_buf.capacity().max(1024);
1699            scratch.read_buf.resize(read_chunk, 0);
1700        }
1701        let bytes_read = self
1702            .zip
1703            .read_file_with_scratch(
1704                &use_entry,
1705                chapter_buf.as_mut_slice(),
1706                &mut scratch.read_buf,
1707            )
1708            .map_err(EpubError::Zip)?;
1709        chapter_buf.truncate(bytes_read);
1710
1711        let mut emitted = 0usize;
1712        let mut callback_err: Option<EpubError> = None;
1713        let mut prep = RenderPrep::new(opts.render).with_serif_default();
1714        prep.prepare_chapter_bytes_with_scratch(
1715            self,
1716            index,
1717            chapter_buf,
1718            &mut scratch.read_buf,
1719            |item| {
1720                if callback_err.is_some() || emitted >= opts.max_items {
1721                    return;
1722                }
1723                if let Err(e) = on_item(item) {
1724                    callback_err = Some(e);
1725                    return;
1726                }
1727                emitted += 1;
1728            },
1729        )
1730        .map_err(EpubError::from)?;
1731
1732        if let Some(err) = callback_err {
1733            return Err(err);
1734        }
1735        if emitted >= opts.max_items {
1736            return Err(EpubError::LimitExceeded {
1737                kind: LimitKind::EventCount,
1738                actual: emitted,
1739                limit: opts.max_items,
1740                path: Some(zip_path),
1741            });
1742        }
1743
1744        Ok(ChapterStreamResult {
1745            items_emitted: emitted,
1746            bytes_read: chapter_buf.len(),
1747            complete: true,
1748        })
1749    }
1750
1751    /// Read a chapter and return plain text extracted from token stream.
1752    ///
1753    /// # Allocation behavior
1754    /// - **Allocates**: Returns new `String`
1755    /// - **Non-embedded-fast-path**: Use `chapter_text_into` for embedded
1756    /// - Caller buffer required: No
1757    /// - Worst-case memory: Depends on chapter text size
1758    ///
1759    /// For lower memory usage, prefer `chapter_text_into`/`chapter_text_with_limit`.
1760    pub fn chapter_text(&mut self, index: usize) -> Result<String, EpubError> {
1761        let mut out = String::with_capacity(0);
1762        self.chapter_text_into(index, &mut out)?;
1763        Ok(out)
1764    }
1765
1766    /// Extract plain text for a chapter into a caller-provided string buffer.
1767    ///
1768    /// This avoids allocating an intermediate `Vec<Token>` and is intended as
1769    /// the default API for constrained environments.
1770    ///
1771    /// # Allocation behavior
1772    /// - **Zero hidden allocations**: Reuses caller's String buffer
1773    /// - Caller buffer required: Yes
1774    /// - **Preferred for embedded**: Primary text extraction API
1775    pub fn chapter_text_into(&mut self, index: usize, out: &mut String) -> Result<(), EpubError> {
1776        self.chapter_text_into_with_limit(index, usize::MAX, out)
1777    }
1778
1779    /// Extract plain text for a chapter and cap output to `max_bytes`.
1780    ///
1781    /// Output is truncated on a UTF-8 boundary when the limit is reached.
1782    pub fn chapter_text_with_limit(
1783        &mut self,
1784        index: usize,
1785        max_bytes: usize,
1786    ) -> Result<String, EpubError> {
1787        let mut out = String::with_capacity(0);
1788        self.chapter_text_into_with_limit(index, max_bytes, &mut out)?;
1789        Ok(out)
1790    }
1791
1792    /// Extract plain text into caller-provided storage, with a hard byte cap.
1793    ///
1794    /// Existing content of `out` is cleared before writing.
1795    pub fn chapter_text_into_with_limit(
1796        &mut self,
1797        index: usize,
1798        max_bytes: usize,
1799        out: &mut String,
1800    ) -> Result<(), EpubError> {
1801        out.clear();
1802        if max_bytes == 0 {
1803            return Ok(());
1804        }
1805
1806        let chapter = self.chapter(index)?;
1807        let bytes = self.read_resource(&chapter.href)?;
1808        extract_plain_text_limited(&bytes, max_bytes, out)
1809    }
1810
1811    /// Tokenize spine item content by index.
1812    ///
1813    /// # Allocation behavior
1814    /// - **Allocates**: Returns `Vec<Token>` (unbounded growth possible)
1815    /// - **Non-embedded-fast-path**: Use `chapter_text_into` for bounded paths
1816    /// - Caller buffer required: No
1817    /// - Worst-case memory: Unbounded (depends on chapter complexity)
1818    ///
1819    /// Prefer `chapter_text_into` for low-memory extraction paths.
1820    /// For bounded tokenization, use `tokenize_html_limited` from the tokenizer module.
1821    pub fn tokenize_spine_item(&mut self, index: usize) -> Result<Vec<Token>, EpubError> {
1822        let chapter = self.chapter(index)?;
1823        let bytes = self.read_resource(&chapter.href)?;
1824        let html =
1825            str::from_utf8(&bytes).map_err(|_| EpubError::ChapterNotUtf8 { href: chapter.href })?;
1826        tokenize_html(html).map_err(EpubError::from)
1827    }
1828
1829    /// Backward-compatible alias for `read_spine_item_bytes`.
1830    pub fn read_spine_chapter(&mut self, index: usize) -> Result<Vec<u8>, EpubError> {
1831        self.read_spine_item_bytes(index)
1832    }
1833
1834    /// Backward-compatible alias for `tokenize_spine_item`.
1835    pub fn tokenize_spine_chapter(&mut self, index: usize) -> Result<Vec<Token>, EpubError> {
1836        self.tokenize_spine_item(index)
1837    }
1838
1839    fn ensure_embedded_fonts_loaded(&mut self) -> Result<&Vec<EmbeddedFontFace>, EpubError> {
1840        if self.embedded_fonts_cache.is_none() {
1841            let css_hrefs: Vec<String> = self
1842                .metadata
1843                .manifest
1844                .iter()
1845                .filter(|item| item.media_type == "text/css")
1846                .map(|item| item.href.clone())
1847                .collect();
1848            let mut out = Vec::with_capacity(0);
1849            for href in css_hrefs {
1850                let bytes = self.read_resource(&href)?;
1851                let css = String::from_utf8(bytes)
1852                    .map_err(|_| EpubError::Parse(format!("Stylesheet is not UTF-8: {}", href)))?;
1853                out.extend(parse_font_faces_from_css(&href, &css));
1854            }
1855            self.embedded_fonts_cache = Some(out);
1856        }
1857        self.embedded_fonts_cache
1858            .as_ref()
1859            .ok_or_else(|| EpubError::Parse("Embedded font cache initialization failed".into()))
1860    }
1861}
1862
1863impl EpubBook<File> {
1864    /// Create a high-level builder for opening/parsing EPUBs.
1865    pub fn builder() -> EpubBookBuilder {
1866        EpubBookBuilder::new()
1867    }
1868}
1869
1870fn load_summary_from_zip<R: Read + Seek>(
1871    zip: &mut StreamingZip<R>,
1872    options: EpubBookOptions,
1873) -> Result<EpubSummary, EpubError> {
1874    zip.validate_mimetype().map_err(EpubError::Zip)?;
1875    let container = read_entry(zip, "META-INF/container.xml")?;
1876    let opf_path = crate::metadata::parse_container_xml(&container)?;
1877    let opf = read_entry(zip, &opf_path)?;
1878    let metadata = extract_metadata(&container, &opf)?;
1879    let spine = crate::spine::parse_spine(&opf)?;
1880    validate_open_invariants(&metadata, &spine, options.validation_mode)?;
1881    let navigation = parse_navigation(
1882        zip,
1883        &metadata,
1884        &spine,
1885        &opf_path,
1886        options.validation_mode,
1887        options.max_nav_bytes,
1888    )?;
1889
1890    Ok(EpubSummary {
1891        metadata,
1892        spine,
1893        navigation,
1894    })
1895}
1896
1897fn parse_navigation<R: Read + Seek>(
1898    zip: &mut StreamingZip<R>,
1899    metadata: &EpubMetadata,
1900    spine: &Spine,
1901    opf_path: &str,
1902    validation_mode: ValidationMode,
1903    max_nav_bytes: Option<usize>,
1904) -> Result<Option<Navigation>, EpubError> {
1905    let nav_item = spine
1906        .toc_id()
1907        .and_then(|toc_id| metadata.get_item(toc_id))
1908        .or_else(|| {
1909            metadata.manifest.iter().find(|item| {
1910                item.properties
1911                    .as_deref()
1912                    .is_some_and(|p| p.split_whitespace().any(|prop| prop == "nav"))
1913            })
1914        })
1915        .or_else(|| {
1916            metadata.manifest.iter().find(|item| {
1917                item.media_type == "application/x-dtbncx+xml"
1918                    || item.href.to_ascii_lowercase().ends_with(".ncx")
1919            })
1920        });
1921
1922    let Some(nav_item) = nav_item else {
1923        return Ok(None);
1924    };
1925
1926    let nav_path = resolve_opf_relative_path(opf_path, &nav_item.href);
1927    let nav_bytes = match read_entry(zip, &nav_path) {
1928        Ok(bytes) => bytes,
1929        Err(err) => {
1930            if matches!(validation_mode, ValidationMode::Strict) {
1931                return Err(err);
1932            }
1933            log::warn!("Failed to read navigation document '{}': {}", nav_path, err);
1934            return Ok(None);
1935        }
1936    };
1937
1938    if let Some(limit) = max_nav_bytes {
1939        if nav_bytes.len() > limit {
1940            return Err(EpubError::Phase(PhaseError {
1941                phase: ErrorPhase::Open,
1942                code: "NAV_BYTES_LIMIT",
1943                message: format!(
1944                    "Navigation bytes exceed configured max_nav_bytes ({} > {})",
1945                    nav_bytes.len(),
1946                    limit
1947                )
1948                .into_boxed_str(),
1949                context: Some(Box::new(PhaseErrorContext {
1950                    source: None,
1951                    path: Some(nav_path.clone().into_boxed_str()),
1952                    href: Some(nav_item.href.clone().into_boxed_str()),
1953                    chapter_index: None,
1954                    selector: None,
1955                    selector_index: None,
1956                    declaration: None,
1957                    declaration_index: None,
1958                    token_offset: None,
1959                    limit: Some(Box::new(ErrorLimitContext::new(
1960                        "max_nav_bytes",
1961                        nav_bytes.len(),
1962                        limit,
1963                    ))),
1964                })),
1965            }));
1966        }
1967    }
1968
1969    let parsed = if nav_item.media_type == "application/x-dtbncx+xml"
1970        || nav_item.href.to_ascii_lowercase().ends_with(".ncx")
1971    {
1972        parse_ncx(&nav_bytes)
1973    } else {
1974        parse_nav_xhtml(&nav_bytes)
1975    };
1976
1977    match parsed {
1978        Ok(nav) => Ok(Some(nav)),
1979        Err(err) => {
1980            if matches!(validation_mode, ValidationMode::Strict) {
1981                Err(EpubError::Navigation(err.to_string()))
1982            } else {
1983                log::warn!(
1984                    "Failed to parse navigation document '{}': {}",
1985                    nav_path,
1986                    err
1987                );
1988                Ok(None)
1989            }
1990        }
1991    }
1992}
1993
1994fn validate_open_invariants(
1995    metadata: &EpubMetadata,
1996    spine: &Spine,
1997    validation_mode: ValidationMode,
1998) -> Result<(), EpubError> {
1999    if matches!(validation_mode, ValidationMode::Lenient) {
2000        return Ok(());
2001    }
2002
2003    for item in spine.items() {
2004        if metadata.get_item(&item.idref).is_none() {
2005            return Err(EpubError::ManifestItemMissing {
2006                idref: item.idref.clone(),
2007            });
2008        }
2009    }
2010
2011    Ok(())
2012}
2013
2014fn read_entry<R: Read + Seek>(zip: &mut StreamingZip<R>, path: &str) -> Result<Vec<u8>, EpubError> {
2015    let mut buf = Vec::with_capacity(0);
2016    read_entry_into(zip, path, &mut buf)?;
2017    Ok(buf)
2018}
2019
2020fn read_entry_into<R: Read + Seek, W: Write>(
2021    zip: &mut StreamingZip<R>,
2022    path: &str,
2023    writer: &mut W,
2024) -> Result<usize, EpubError> {
2025    read_entry_into_with_limit(zip, path, writer, usize::MAX)
2026}
2027
2028fn read_entry_into_with_limit<R: Read + Seek, W: Write>(
2029    zip: &mut StreamingZip<R>,
2030    path: &str,
2031    writer: &mut W,
2032    max_bytes: usize,
2033) -> Result<usize, EpubError> {
2034    let (method, compressed_size, uncompressed_size, local_header_offset, crc32) = {
2035        let entry = zip
2036            .get_entry(path)
2037            .ok_or(EpubError::Zip(ZipError::FileNotFound))?;
2038        (
2039            entry.method,
2040            entry.compressed_size,
2041            entry.uncompressed_size,
2042            entry.local_header_offset,
2043            entry.crc32,
2044        )
2045    };
2046
2047    if uncompressed_size > max_bytes as u64 || compressed_size > max_bytes as u64 {
2048        return Err(EpubError::Zip(ZipError::FileTooLarge));
2049    }
2050    let entry = CdEntry {
2051        method,
2052        compressed_size,
2053        uncompressed_size,
2054        local_header_offset,
2055        crc32,
2056        filename: String::with_capacity(0),
2057    };
2058    zip.read_file_to_writer(&entry, writer)
2059        .map_err(EpubError::Zip)
2060}
2061
2062fn resolve_opf_relative_path(opf_path: &str, href: &str) -> String {
2063    let href = href.split('#').next().unwrap_or(href);
2064    if href.is_empty() {
2065        return normalize_path(opf_path);
2066    }
2067    if href.starts_with('/') {
2068        return normalize_path(href.trim_start_matches('/'));
2069    }
2070    if href.contains("://") {
2071        return href.to_string();
2072    }
2073
2074    let base_dir = opf_path.rsplit_once('/').map(|(dir, _)| dir).unwrap_or("");
2075    if base_dir.is_empty() {
2076        normalize_path(href)
2077    } else {
2078        normalize_path(&format!("{}/{}", base_dir, href))
2079    }
2080}
2081
2082fn normalize_path(path: &str) -> String {
2083    let mut parts: Vec<&str> = Vec::with_capacity(0);
2084    for part in path.split('/') {
2085        match part {
2086            "" | "." => {}
2087            ".." => {
2088                parts.pop();
2089            }
2090            _ => parts.push(part),
2091        }
2092    }
2093    parts.join("/")
2094}
2095
2096fn is_cover_document(media_type: Option<&str>, path: &str) -> bool {
2097    if let Some(media_type) = media_type {
2098        let lower = media_type.trim().to_ascii_lowercase();
2099        if lower.contains("xhtml") || lower.contains("html") {
2100            return true;
2101        }
2102    }
2103    let lower = path.to_ascii_lowercase();
2104    lower.ends_with(".xhtml") || lower.ends_with(".html") || lower.ends_with(".htm")
2105}
2106
2107fn infer_image_media_type_from_path(path: &str) -> Option<&'static str> {
2108    let lower = path.to_ascii_lowercase();
2109    if lower.ends_with(".jpg") || lower.ends_with(".jpeg") {
2110        Some("image/jpeg")
2111    } else if lower.ends_with(".png") {
2112        Some("image/png")
2113    } else if lower.ends_with(".gif") {
2114        Some("image/gif")
2115    } else if lower.ends_with(".bmp") {
2116        Some("image/bmp")
2117    } else if lower.ends_with(".webp") {
2118        Some("image/webp")
2119    } else if lower.ends_with(".svg") || lower.ends_with(".svgz") {
2120        Some("image/svg+xml")
2121    } else {
2122        None
2123    }
2124}
2125
2126fn normalized_image_media_type(media_type: Option<&str>, path: &str) -> Option<String> {
2127    media_type
2128        .map(|value| value.trim().to_ascii_lowercase())
2129        .or_else(|| infer_image_media_type_from_path(path).map(ToString::to_string))
2130}
2131
2132fn is_supported_image_resource(
2133    media_type: Option<&str>,
2134    path: &str,
2135    options: ImageReadOptions,
2136) -> bool {
2137    let Some(media) = normalized_image_media_type(media_type, path) else {
2138        return options.allow_unknown_images;
2139    };
2140    if !media.starts_with("image/") {
2141        return false;
2142    }
2143    if !options.allow_svg && media == "image/svg+xml" {
2144        return false;
2145    }
2146    true
2147}
2148
2149fn should_skip_text_tag(name: &str) -> bool {
2150    matches!(
2151        name,
2152        "script" | "style" | "head" | "nav" | "header" | "footer" | "aside" | "noscript"
2153    )
2154}
2155
2156fn normalize_plain_text_whitespace(text: &str) -> String {
2157    let mut result = String::with_capacity(text.len());
2158    let mut prev_was_space = true;
2159    for ch in text.chars() {
2160        if ch.is_whitespace() {
2161            if !prev_was_space {
2162                result.push(' ');
2163                prev_was_space = true;
2164            }
2165        } else {
2166            result.push(ch);
2167            prev_was_space = false;
2168        }
2169    }
2170    if result.ends_with(' ') {
2171        result.pop();
2172    }
2173    result
2174}
2175
2176fn push_limited(out: &mut String, value: &str, max_bytes: usize) -> bool {
2177    if out.len() >= max_bytes || value.is_empty() {
2178        return out.len() >= max_bytes;
2179    }
2180    let remaining = max_bytes - out.len();
2181    if value.len() <= remaining {
2182        out.push_str(value);
2183        return false;
2184    }
2185    let mut end = remaining;
2186    while !value.is_char_boundary(end) {
2187        end -= 1;
2188    }
2189    if end > 0 {
2190        out.push_str(&value[..end]);
2191    }
2192    true
2193}
2194
2195fn push_newline_limited(out: &mut String, max_bytes: usize) -> bool {
2196    if out.is_empty() || out.ends_with('\n') {
2197        return false;
2198    }
2199    push_limited(out, "\n", max_bytes)
2200}
2201
2202fn push_text_limited(out: &mut String, text: &str, max_bytes: usize) -> bool {
2203    if text.is_empty() {
2204        return false;
2205    }
2206    if !out.is_empty() && !out.ends_with('\n') && push_limited(out, " ", max_bytes) {
2207        return true;
2208    }
2209    push_limited(out, text, max_bytes)
2210}
2211
2212fn extract_plain_text_limited(
2213    html: &[u8],
2214    max_bytes: usize,
2215    out: &mut String,
2216) -> Result<(), EpubError> {
2217    let mut reader = Reader::from_reader(html);
2218    reader.config_mut().trim_text(false);
2219    reader.config_mut().expand_empty_elements = false;
2220
2221    let mut buf = Vec::with_capacity(0);
2222    let mut skip_depth = 0usize;
2223    let mut done = false;
2224
2225    while !done {
2226        match reader.read_event_into(&mut buf) {
2227            Ok(Event::Start(e)) => {
2228                let name = reader
2229                    .decoder()
2230                    .decode(e.name().as_ref())
2231                    .map_err(|err| EpubError::Parse(format!("Decode error: {:?}", err)))?
2232                    .to_string();
2233                if should_skip_text_tag(&name) {
2234                    skip_depth += 1;
2235                } else if skip_depth == 0
2236                    && matches!(name.as_str(), "p" | "div" | "li")
2237                    && push_newline_limited(out, max_bytes)
2238                {
2239                    done = true;
2240                }
2241            }
2242            Ok(Event::Empty(e)) => {
2243                if skip_depth > 0 {
2244                    buf.clear();
2245                    continue;
2246                }
2247                let name = reader
2248                    .decoder()
2249                    .decode(e.name().as_ref())
2250                    .map_err(|err| EpubError::Parse(format!("Decode error: {:?}", err)))?
2251                    .to_string();
2252                if matches!(name.as_str(), "br" | "p" | "div" | "li")
2253                    && push_newline_limited(out, max_bytes)
2254                {
2255                    done = true;
2256                }
2257            }
2258            Ok(Event::End(e)) => {
2259                let name = reader
2260                    .decoder()
2261                    .decode(e.name().as_ref())
2262                    .map_err(|err| EpubError::Parse(format!("Decode error: {:?}", err)))?
2263                    .to_string();
2264                if should_skip_text_tag(&name) {
2265                    skip_depth = skip_depth.saturating_sub(1);
2266                } else if skip_depth == 0
2267                    && matches!(name.as_str(), "p" | "div" | "li")
2268                    && push_newline_limited(out, max_bytes)
2269                {
2270                    done = true;
2271                }
2272            }
2273            Ok(Event::Text(e)) => {
2274                if skip_depth > 0 {
2275                    buf.clear();
2276                    continue;
2277                }
2278                let text = e
2279                    .decode()
2280                    .map_err(|err| EpubError::Parse(format!("Decode error: {:?}", err)))?
2281                    .to_string();
2282                let normalized = normalize_plain_text_whitespace(&text);
2283                if push_text_limited(out, &normalized, max_bytes) {
2284                    done = true;
2285                }
2286            }
2287            Ok(Event::CData(e)) => {
2288                if skip_depth > 0 {
2289                    buf.clear();
2290                    continue;
2291                }
2292                let text = reader
2293                    .decoder()
2294                    .decode(&e)
2295                    .map_err(|err| EpubError::Parse(format!("Decode error: {:?}", err)))?
2296                    .to_string();
2297                let normalized = normalize_plain_text_whitespace(&text);
2298                if push_text_limited(out, &normalized, max_bytes) {
2299                    done = true;
2300                }
2301            }
2302            Ok(Event::GeneralRef(e)) => {
2303                if skip_depth > 0 {
2304                    buf.clear();
2305                    continue;
2306                }
2307                let entity_name = e
2308                    .decode()
2309                    .map_err(|err| EpubError::Parse(format!("Decode error: {:?}", err)))?;
2310                let entity = format!("&{};", entity_name);
2311                let resolved = quick_xml::escape::unescape(&entity)
2312                    .map_err(|err| EpubError::Parse(format!("Unescape error: {:?}", err)))?
2313                    .to_string();
2314                let normalized = normalize_plain_text_whitespace(&resolved);
2315                if push_text_limited(out, &normalized, max_bytes) {
2316                    done = true;
2317                }
2318            }
2319            Ok(Event::Eof) => break,
2320            Ok(_) => {}
2321            Err(err) => return Err(EpubError::Parse(format!("XML error: {:?}", err))),
2322        }
2323        buf.clear();
2324    }
2325
2326    if out.ends_with('\n') {
2327        out.pop();
2328    }
2329    Ok(())
2330}
2331
2332#[cfg(test)]
2333mod tests {
2334    use super::*;
2335    use crate::render_prep::{RenderPrep, RenderPrepOptions, RenderPrepTrace, StyledEventOrRun};
2336
2337    #[test]
2338    fn test_resolve_opf_relative_path() {
2339        assert_eq!(
2340            resolve_opf_relative_path("EPUB/package.opf", "text/ch1.xhtml"),
2341            "EPUB/text/ch1.xhtml"
2342        );
2343        assert_eq!(
2344            resolve_opf_relative_path("OEBPS/content.opf", "../toc.ncx"),
2345            "toc.ncx"
2346        );
2347        assert_eq!(
2348            resolve_opf_relative_path("package.opf", "chapter.xhtml#p1"),
2349            "chapter.xhtml"
2350        );
2351        assert_eq!(
2352            resolve_opf_relative_path("EPUB/package.opf", "/META-INF/container.xml"),
2353            "META-INF/container.xml"
2354        );
2355    }
2356
2357    #[test]
2358    fn test_read_resource_into_streams_to_writer() {
2359        let file = std::fs::File::open(
2360            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2361        )
2362        .expect("fixture should open");
2363        let mut book = EpubBook::from_reader(file).expect("book should open");
2364
2365        let mut out = Vec::with_capacity(0);
2366        let n = book
2367            .read_resource_into("xhtml/nav.xhtml", &mut out)
2368            .expect("resource should stream");
2369        assert_eq!(n, out.len());
2370        assert!(!out.is_empty());
2371    }
2372
2373    #[test]
2374    fn test_read_resource_into_with_hard_cap_errors_when_exceeded() {
2375        let file = std::fs::File::open(
2376            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2377        )
2378        .expect("fixture should open");
2379        let mut book = EpubBook::from_reader(file).expect("book should open");
2380
2381        let mut out = Vec::with_capacity(0);
2382        let err = book
2383            .read_resource_into_with_hard_cap("xhtml/nav.xhtml", &mut out, 8)
2384            .expect_err("hard cap should fail");
2385        assert!(matches!(err, EpubError::Zip(ZipError::FileTooLarge)));
2386    }
2387
2388    #[test]
2389    fn test_read_resource_into_with_limit_succeeds_when_under_cap() {
2390        let file = std::fs::File::open(
2391            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2392        )
2393        .expect("fixture should open");
2394        let mut book = EpubBook::from_reader(file).expect("book should open");
2395
2396        let mut out = Vec::with_capacity(0);
2397        let n = book
2398            .read_resource_into_with_limit("xhtml/nav.xhtml", &mut out, 1024 * 1024)
2399            .expect("limit should allow nav payload");
2400        assert_eq!(n, out.len());
2401        assert!(!out.is_empty());
2402    }
2403
2404    #[test]
2405    fn test_cover_image_ref_fundamental_fixture() {
2406        let file = std::fs::File::open(
2407            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2408        )
2409        .expect("fixture should open");
2410        let mut book = EpubBook::from_reader(file).expect("book should open");
2411
2412        let cover = book
2413            .cover_image_ref()
2414            .expect("cover resolution should succeed")
2415            .expect("cover should resolve");
2416        assert_eq!(cover.source, CoverImageSource::Manifest);
2417        assert_eq!(cover.media_type.as_deref(), Some("image/jpeg"));
2418        assert!(cover.zip_path.ends_with("images/cover.jpg"));
2419    }
2420
2421    #[test]
2422    fn test_cover_image_ref_frankenstein_fixture() {
2423        let file = std::fs::File::open("tests/fixtures/bench/pg84-frankenstein.epub")
2424            .expect("fixture should open");
2425        let mut book = EpubBook::from_reader(file).expect("book should open");
2426
2427        let cover = book
2428            .cover_image_ref()
2429            .expect("cover resolution should succeed")
2430            .expect("cover should resolve");
2431        assert_eq!(cover.source, CoverImageSource::Manifest);
2432        assert_eq!(cover.media_type.as_deref(), Some("image/jpeg"));
2433        assert!(cover.zip_path.ends_with("_cover.jpg"));
2434    }
2435
2436    #[test]
2437    fn test_read_cover_image_into_respects_limit() {
2438        let file = std::fs::File::open("tests/fixtures/bench/pg84-frankenstein.epub")
2439            .expect("fixture should open");
2440        let mut book = EpubBook::from_reader(file).expect("book should open");
2441
2442        let mut out = Vec::with_capacity(0);
2443        let err = book
2444            .read_cover_image_into_with_options(
2445                &mut out,
2446                CoverImageOptions {
2447                    image: ImageReadOptions {
2448                        max_bytes: 1024,
2449                        ..ImageReadOptions::default()
2450                    },
2451                    ..CoverImageOptions::default()
2452                },
2453            )
2454            .expect_err("oversized cover should fail under hard cap");
2455        assert!(matches!(err, EpubError::Zip(ZipError::FileTooLarge)));
2456    }
2457
2458    #[test]
2459    fn test_read_image_resource_into_rejects_non_image_payload() {
2460        let file = std::fs::File::open(
2461            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2462        )
2463        .expect("fixture should open");
2464        let mut book = EpubBook::from_reader(file).expect("book should open");
2465        let mut out = Vec::with_capacity(0);
2466        let err = book
2467            .read_image_resource_into_with_options(
2468                "xhtml/nav.xhtml",
2469                &mut out,
2470                ImageReadOptions::default(),
2471            )
2472            .expect_err("non-image resources should be rejected");
2473        match err {
2474            EpubError::Parse(msg) => {
2475                assert!(msg.contains("not an allowed image"));
2476            }
2477            other => panic!("expected parse error, got {:?}", other),
2478        }
2479    }
2480
2481    #[test]
2482    fn test_open_enforces_max_nav_bytes_limit() {
2483        let file = std::fs::File::open(
2484            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2485        )
2486        .expect("fixture should open");
2487        let err = match EpubBook::from_reader_with_options(
2488            file,
2489            EpubBookOptions {
2490                max_nav_bytes: Some(8),
2491                ..EpubBookOptions::default()
2492            },
2493        ) {
2494            Ok(_) => panic!("open should fail when navigation exceeds cap"),
2495            Err(err) => err,
2496        };
2497        match err {
2498            EpubError::Phase(phase) => {
2499                assert_eq!(phase.code, "NAV_BYTES_LIMIT");
2500                let ctx = phase.context.expect("phase context should be present");
2501                let limit = ctx.limit.expect("limit context should be present");
2502                assert_eq!(limit.kind.as_ref(), "max_nav_bytes");
2503                assert_eq!(limit.limit, 8);
2504            }
2505            other => panic!("expected phase error, got {:?}", other),
2506        }
2507    }
2508
2509    #[test]
2510    fn test_lazy_navigation_loaded_by_ensure_navigation() {
2511        let file = std::fs::File::open(
2512            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2513        )
2514        .expect("fixture should open");
2515        let mut book = EpubBook::from_reader_with_config(
2516            file,
2517            OpenConfig {
2518                options: EpubBookOptions::default(),
2519                lazy_navigation: true,
2520            },
2521        )
2522        .expect("book should open");
2523        assert!(book.navigation().is_none());
2524        let nav = book
2525            .ensure_navigation()
2526            .expect("ensure navigation should parse");
2527        assert!(nav.is_some());
2528    }
2529
2530    #[test]
2531    fn test_chapter_text_into_matches_chapter_text() {
2532        let file = std::fs::File::open(
2533            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2534        )
2535        .expect("fixture should open");
2536        let mut book = EpubBook::from_reader(file).expect("book should open");
2537        let baseline = book.chapter_text(0).expect("chapter text should extract");
2538        let mut out = String::with_capacity(0);
2539        book.chapter_text_into(0, &mut out)
2540            .expect("chapter text into should extract");
2541        assert_eq!(baseline, out);
2542    }
2543
2544    #[test]
2545    fn test_chapter_html_into_matches_chapter_html() {
2546        let file = std::fs::File::open(
2547            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2548        )
2549        .expect("fixture should open");
2550        let mut book = EpubBook::from_reader(file).expect("book should open");
2551
2552        let baseline = book.chapter_html(0).expect("chapter html should extract");
2553        let mut out = String::with_capacity(0);
2554        book.chapter_html_into(0, &mut out)
2555            .expect("chapter html into should extract");
2556        assert_eq!(baseline, out);
2557    }
2558
2559    #[test]
2560    fn test_chapter_html_into_with_limit_enforces_cap() {
2561        let file = std::fs::File::open(
2562            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2563        )
2564        .expect("fixture should open");
2565        let mut book = EpubBook::from_reader(file).expect("book should open");
2566
2567        let mut out = String::with_capacity(0);
2568        let err = book
2569            .chapter_html_into_with_limit(0, 8, &mut out)
2570            .expect_err("hard cap should fail");
2571        assert!(matches!(err, EpubError::Zip(ZipError::FileTooLarge)));
2572    }
2573
2574    #[test]
2575    fn test_chapter_text_with_limit_truncates_safely() {
2576        let file = std::fs::File::open(
2577            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2578        )
2579        .expect("fixture should open");
2580        let mut book = EpubBook::from_reader(file).expect("book should open");
2581        let full = book.chapter_text(0).expect("full text should extract");
2582        let limited = book
2583            .chapter_text_with_limit(0, 64)
2584            .expect("limited text should extract");
2585        assert!(limited.len() <= 64);
2586        assert!(full.starts_with(&limited));
2587    }
2588
2589    #[test]
2590    fn test_chapter_text_with_zero_limit_is_empty() {
2591        let file = std::fs::File::open(
2592            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2593        )
2594        .expect("fixture should open");
2595        let mut book = EpubBook::from_reader(file).expect("book should open");
2596        let limited = book
2597            .chapter_text_with_limit(0, 0)
2598            .expect("limited text should extract");
2599        assert!(limited.is_empty());
2600    }
2601
2602    #[test]
2603    fn test_chapter_text_into_with_limit_clears_existing_buffer() {
2604        let file = std::fs::File::open(
2605            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2606        )
2607        .expect("fixture should open");
2608        let mut book = EpubBook::from_reader(file).expect("book should open");
2609        let mut out = String::from("stale content");
2610        book.chapter_text_into_with_limit(0, 32, &mut out)
2611            .expect("limited text should extract");
2612        assert!(!out.starts_with("stale content"));
2613        assert!(out.len() <= 32);
2614    }
2615
2616    #[test]
2617    fn test_extract_plain_text_limited_preserves_utf8_boundaries() {
2618        let html = "<p>hello 😀 world</p>";
2619        let mut out = String::with_capacity(0);
2620        extract_plain_text_limited(html.as_bytes(), 8, &mut out).expect("extract should succeed");
2621        assert!(out.len() <= 8);
2622        assert!(core::str::from_utf8(out.as_bytes()).is_ok());
2623    }
2624
2625    #[test]
2626    fn test_chapter_stylesheets_api_works() {
2627        let file = std::fs::File::open(
2628            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2629        )
2630        .expect("fixture should open");
2631        let mut book = EpubBook::from_reader(file).expect("book should open");
2632        let styles = book
2633            .chapter_stylesheets(0)
2634            .expect("chapter_stylesheets should succeed");
2635        assert!(styles.sources.iter().all(|s| !s.href.is_empty()));
2636    }
2637
2638    #[test]
2639    fn test_styles_for_chapter_alias_matches_with_options() {
2640        let file = std::fs::File::open(
2641            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2642        )
2643        .expect("fixture should open");
2644        let mut book = EpubBook::from_reader(file).expect("book should open");
2645        let limits = StyleLimits::default();
2646        let a = book
2647            .chapter_stylesheets_with_options(0, limits)
2648            .expect("chapter_stylesheets_with_options should succeed");
2649        let b = book
2650            .styles_for_chapter(0, limits)
2651            .expect("styles_for_chapter should succeed");
2652        assert_eq!(a, b);
2653    }
2654
2655    #[test]
2656    fn test_embedded_fonts_api_works() {
2657        let file = std::fs::File::open(
2658            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2659        )
2660        .expect("fixture should open");
2661        let mut book = EpubBook::from_reader(file).expect("book should open");
2662        let fonts = book
2663            .embedded_fonts()
2664            .expect("embedded_fonts should succeed");
2665        assert!(fonts.len() <= crate::render_prep::FontLimits::default().max_faces);
2666    }
2667
2668    #[test]
2669    fn test_embedded_fonts_with_limits_alias_matches_with_options() {
2670        let file = std::fs::File::open(
2671            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2672        )
2673        .expect("fixture should open");
2674        let mut book = EpubBook::from_reader(file).expect("book should open");
2675        let limits = FontLimits::default();
2676        let a = book
2677            .embedded_fonts_with_options(limits)
2678            .expect("embedded_fonts_with_options should succeed");
2679        let b = book
2680            .embedded_fonts_with_limits(limits)
2681            .expect("embedded_fonts_with_limits should succeed");
2682        assert_eq!(a, b);
2683    }
2684
2685    #[test]
2686    fn test_render_prep_golden_path_prepare_chapter() {
2687        let file = std::fs::File::open(
2688            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2689        )
2690        .expect("fixture should open");
2691        let mut book = EpubBook::from_reader(file).expect("book should open");
2692        let mut prep = RenderPrep::new(RenderPrepOptions::default())
2693            .with_serif_default()
2694            .with_embedded_fonts_from_book(&mut book)
2695            .expect("font registration should succeed");
2696        let index = (0..book.chapter_count())
2697            .find(|idx| {
2698                book.chapter_text_with_limit(*idx, 256)
2699                    .map(|s| !s.trim().is_empty())
2700                    .unwrap_or(false)
2701            })
2702            .unwrap_or(0);
2703        let chapter = prep
2704            .prepare_chapter(&mut book, index)
2705            .expect("prepare_chapter should succeed");
2706        assert!(chapter.iter().count() > 0);
2707    }
2708
2709    #[test]
2710    fn test_chapter_styled_runs_api_returns_items() {
2711        let file = std::fs::File::open(
2712            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2713        )
2714        .expect("fixture should open");
2715        let mut book = EpubBook::from_reader(file).expect("book should open");
2716        let index = (0..book.chapter_count())
2717            .find(|idx| {
2718                book.chapter_text_with_limit(*idx, 256)
2719                    .map(|s| !s.trim().is_empty())
2720                    .unwrap_or(false)
2721            })
2722            .unwrap_or(0);
2723        let styled = book
2724            .chapter_styled_runs(index)
2725            .expect("chapter_styled_runs should succeed");
2726        assert!(styled.iter().count() > 0);
2727    }
2728
2729    #[test]
2730    fn test_chapter_events_streaming_emits_items() {
2731        let file = std::fs::File::open(
2732            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2733        )
2734        .expect("fixture should open");
2735        let mut book = EpubBook::from_reader(file).expect("book should open");
2736        let index = (0..book.chapter_count())
2737            .find(|idx| {
2738                book.chapter_text_with_limit(*idx, 256)
2739                    .map(|s| !s.trim().is_empty())
2740                    .unwrap_or(false)
2741            })
2742            .unwrap_or(0);
2743
2744        let mut seen = 0usize;
2745        let emitted = book
2746            .chapter_events(index, ChapterEventsOptions::default(), |_| {
2747                seen += 1;
2748                Ok(())
2749            })
2750            .expect("chapter_events should succeed");
2751        assert_eq!(emitted, seen);
2752        assert!(emitted > 0);
2753    }
2754
2755    #[test]
2756    fn test_chapter_events_respects_max_items_cap() {
2757        let file = std::fs::File::open(
2758            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2759        )
2760        .expect("fixture should open");
2761        let mut book = EpubBook::from_reader(file).expect("book should open");
2762        let index = (0..book.chapter_count())
2763            .find(|idx| {
2764                book.chapter_text_with_limit(*idx, 256)
2765                    .map(|s| !s.trim().is_empty())
2766                    .unwrap_or(false)
2767            })
2768            .unwrap_or(0);
2769
2770        let err = book
2771            .chapter_events(
2772                index,
2773                ChapterEventsOptions {
2774                    max_items: 1,
2775                    ..ChapterEventsOptions::default()
2776                },
2777                |_| Ok(()),
2778            )
2779            .expect_err("max_items cap should fail");
2780        assert!(matches!(err, EpubError::Parse(_)));
2781    }
2782
2783    #[test]
2784    fn test_render_prep_prepare_chapter_into_streams_items() {
2785        let file = std::fs::File::open(
2786            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2787        )
2788        .expect("fixture should open");
2789        let mut book = EpubBook::from_reader(file).expect("book should open");
2790        let index = (0..book.chapter_count())
2791            .find(|idx| {
2792                book.chapter_text_with_limit(*idx, 256)
2793                    .map(|s| !s.trim().is_empty())
2794                    .unwrap_or(false)
2795            })
2796            .unwrap_or(0);
2797        let mut prep = RenderPrep::new(RenderPrepOptions::default())
2798            .with_serif_default()
2799            .with_embedded_fonts_from_book(&mut book)
2800            .expect("font registration should succeed");
2801        let mut out = Vec::with_capacity(0);
2802        prep.prepare_chapter_into(&mut book, index, &mut out)
2803            .expect("prepare_chapter_into should succeed");
2804        assert!(!out.is_empty());
2805    }
2806
2807    #[test]
2808    fn test_render_prep_runs_persist_resolved_font_id() {
2809        let file = std::fs::File::open(
2810            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2811        )
2812        .expect("fixture should open");
2813        let mut book = EpubBook::from_reader(file).expect("book should open");
2814        let index = (0..book.chapter_count())
2815            .find(|idx| {
2816                book.chapter_text_with_limit(*idx, 256)
2817                    .map(|s| !s.trim().is_empty())
2818                    .unwrap_or(false)
2819            })
2820            .unwrap_or(0);
2821        let mut prep = RenderPrep::new(RenderPrepOptions::default())
2822            .with_serif_default()
2823            .with_embedded_fonts_from_book(&mut book)
2824            .expect("font registration should succeed");
2825
2826        let mut saw_run = false;
2827        prep.prepare_chapter_with_trace_context(&mut book, index, |item, trace| {
2828            if let StyledEventOrRun::Run(run) = item {
2829                saw_run = true;
2830                let font_trace = trace.font_trace().expect("run should include font trace");
2831                assert_eq!(run.font_id, font_trace.face.font_id);
2832                assert_eq!(run.resolved_family, font_trace.face.family);
2833            }
2834        })
2835        .expect("prepare_chapter_with_trace_context should succeed");
2836        assert!(saw_run);
2837    }
2838
2839    #[test]
2840    fn test_render_prep_trace_context_contains_font_and_style_for_runs() {
2841        let file = std::fs::File::open(
2842            "tests/fixtures/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
2843        )
2844        .expect("fixture should open");
2845        let mut book = EpubBook::from_reader(file).expect("book should open");
2846        let index = (0..book.chapter_count())
2847            .find(|idx| {
2848                book.chapter_text_with_limit(*idx, 256)
2849                    .map(|s| !s.trim().is_empty())
2850                    .unwrap_or(false)
2851            })
2852            .unwrap_or(0);
2853        let mut prep = RenderPrep::new(RenderPrepOptions::default())
2854            .with_serif_default()
2855            .with_embedded_fonts_from_book(&mut book)
2856            .expect("font registration should succeed");
2857
2858        let mut saw_run = false;
2859        prep.prepare_chapter_with_trace_context(&mut book, index, |item, trace| match item {
2860            StyledEventOrRun::Run(run) => {
2861                saw_run = true;
2862                match trace {
2863                    RenderPrepTrace::Run { style, font } => {
2864                        assert_eq!(style.as_ref(), &run.style);
2865                        assert_eq!(font.face.font_id, run.font_id);
2866                        assert_eq!(font.face.family, run.resolved_family);
2867                    }
2868                    RenderPrepTrace::Event => panic!("run item should produce run trace context"),
2869                }
2870            }
2871            StyledEventOrRun::Event(_) => {
2872                assert!(matches!(trace, RenderPrepTrace::Event));
2873            }
2874            StyledEventOrRun::Image(_) => {
2875                assert!(matches!(trace, RenderPrepTrace::Event));
2876            }
2877        })
2878        .expect("prepare_chapter_with_trace_context should succeed");
2879        assert!(saw_run);
2880    }
2881
2882    #[test]
2883    fn test_reading_session_resolve_locator_and_progress() {
2884        let chapters = vec![
2885            ChapterRef {
2886                index: 0,
2887                idref: "c1".to_string(),
2888                href: "text/ch1.xhtml".to_string(),
2889                media_type: "application/xhtml+xml".to_string(),
2890            },
2891            ChapterRef {
2892                index: 1,
2893                idref: "c2".to_string(),
2894                href: "text/ch2.xhtml".to_string(),
2895                media_type: "application/xhtml+xml".to_string(),
2896            },
2897        ];
2898        let nav = Navigation {
2899            toc: vec![NavPoint {
2900                label: "intro".to_string(),
2901                href: "text/ch2.xhtml#start".to_string(),
2902                children: Vec::with_capacity(0),
2903            }],
2904            page_list: Vec::with_capacity(0),
2905            landmarks: Vec::with_capacity(0),
2906        };
2907        let mut session = ReadingSession::new(chapters, Some(nav));
2908        let resolved = session
2909            .resolve_locator(Locator::TocId("intro".to_string()))
2910            .expect("toc id should resolve");
2911        assert_eq!(resolved.chapter.index, 1);
2912        assert_eq!(resolved.fragment.as_deref(), Some("start"));
2913        assert!(session.book_progress() > 0.0);
2914    }
2915
2916    #[test]
2917    fn test_reading_session_seek_position_out_of_bounds() {
2918        let chapters = vec![ChapterRef {
2919            index: 0,
2920            idref: "c1".to_string(),
2921            href: "text/ch1.xhtml".to_string(),
2922            media_type: "application/xhtml+xml".to_string(),
2923        }];
2924        let mut session = ReadingSession::new(chapters, None);
2925        let err = session
2926            .seek_position(&ReadingPosition {
2927                chapter_index: 2,
2928                chapter_href: None,
2929                anchor: None,
2930                fallback_offset: 0,
2931            })
2932            .expect_err("seek should fail");
2933        assert!(matches!(err, EpubError::ChapterOutOfBounds { .. }));
2934    }
2935}