Skip to main content

epub_stream/
streaming.rs

1//! Streaming chapter reader for memory-efficient EPUB processing.
2//!
3//! Provides truly streaming chapter processing that reads directly from ZIP
4//! without materializing the full chapter content.
5
6extern crate alloc;
7
8use alloc::string::{String, ToString};
9use alloc::vec::Vec;
10use core::cmp::min;
11
12#[cfg(feature = "std")]
13use crate::render_prep::{RenderPrepError, RenderPrepOptions, StyledEventOrRun};
14
15/// Scratch buffer pool for streaming operations.
16///
17/// Pre-allocated buffers that can be reused across operations to avoid
18/// repeated allocations in hot paths.
19#[derive(Debug)]
20pub struct ScratchBuffers {
21    /// Primary buffer for reading chunks from ZIP
22    pub read_buf: Vec<u8>,
23    /// Buffer for XML parsing events
24    pub xml_buf: Vec<u8>,
25    /// Buffer for text accumulation
26    pub text_buf: String,
27}
28
29impl ScratchBuffers {
30    /// Create scratch buffers with specified capacities.
31    pub fn new(read_capacity: usize, xml_capacity: usize) -> Self {
32        Self {
33            read_buf: Vec::with_capacity(read_capacity),
34            xml_buf: Vec::with_capacity(xml_capacity),
35            text_buf: String::with_capacity(4096),
36        }
37    }
38
39    /// Create buffers suitable for embedded use (small, bounded).
40    pub fn embedded() -> Self {
41        Self::new(8192, 4096)
42    }
43
44    /// Create buffers for desktop use (larger, more performant).
45    pub fn desktop() -> Self {
46        Self::new(65536, 32768)
47    }
48
49    /// Clear all buffers without deallocating.
50    pub fn clear(&mut self) {
51        self.read_buf.clear();
52        self.xml_buf.clear();
53        self.text_buf.clear();
54    }
55}
56
57/// Chunking limits for incremental processing.
58///
59/// Prevents single large allocations by breaking work into smaller chunks.
60#[derive(Clone, Copy, Debug, PartialEq, Eq)]
61pub struct ChunkLimits {
62    /// Maximum bytes to process in a single read operation.
63    pub max_read_chunk: usize,
64    /// Maximum bytes for accumulated text before forcing a flush.
65    pub max_text_accumulation: usize,
66    /// Maximum number of events to process before yielding control.
67    pub max_events_per_yield: usize,
68    /// Maximum depth for element stack.
69    pub max_stack_depth: usize,
70}
71
72impl Default for ChunkLimits {
73    fn default() -> Self {
74        Self {
75            max_read_chunk: 16384,       // 16KB read chunks
76            max_text_accumulation: 8192, // 8KB text buffer
77            max_events_per_yield: 1000,  // Process 1000 events at a time
78            max_stack_depth: 256,        // 256 levels of nesting
79        }
80    }
81}
82
83impl ChunkLimits {
84    /// Conservative limits for embedded environments.
85    pub fn embedded() -> Self {
86        Self {
87            max_read_chunk: 4096,        // 4KB read chunks
88            max_text_accumulation: 2048, // 2KB text buffer
89            max_events_per_yield: 500,   // Process 500 events at a time
90            max_stack_depth: 64,         // 64 levels of nesting
91        }
92    }
93}
94
95/// Stateful pagination context for resumable page layout.
96///
97/// Tracks parsing/layout state so page N+1 can continue from where
98/// page N left off without re-parsing from the start.
99#[derive(Clone, Debug)]
100pub struct PaginationContext {
101    /// Current byte offset in the source document.
102    pub byte_offset: usize,
103    /// Current event/token index.
104    pub event_index: usize,
105    /// Current element stack (path from root to current element).
106    pub element_stack: Vec<String>,
107    /// Accumulated text since last page break.
108    pub text_accumulator: String,
109    /// Current page number.
110    pub page_number: usize,
111}
112
113impl Default for PaginationContext {
114    fn default() -> Self {
115        Self {
116            byte_offset: 0,
117            event_index: 0,
118            element_stack: Vec::with_capacity(32),
119            text_accumulator: String::with_capacity(4096),
120            page_number: 0,
121        }
122    }
123}
124
125impl PaginationContext {
126    /// Create a new context for starting at the beginning.
127    pub fn new() -> Self {
128        Self::default()
129    }
130
131    /// Reset for a new chapter.
132    pub fn reset(&mut self) {
133        self.byte_offset = 0;
134        self.event_index = 0;
135        self.element_stack.clear();
136        self.text_accumulator.clear();
137        self.page_number = 0;
138    }
139
140    /// Advance to the next page.
141    pub fn next_page(&mut self) {
142        self.page_number += 1;
143        self.text_accumulator.clear();
144    }
145
146    /// Update byte offset.
147    pub fn advance_bytes(&mut self, bytes: usize) {
148        self.byte_offset += bytes;
149    }
150
151    /// Update event index.
152    pub fn advance_events(&mut self, events: usize) {
153        self.event_index += events;
154    }
155
156    /// Push element onto stack.
157    pub fn push_element(&mut self, tag: &str) {
158        self.element_stack.push(tag.to_string());
159    }
160
161    /// Pop element from stack.
162    pub fn pop_element(&mut self) -> Option<String> {
163        self.element_stack.pop()
164    }
165
166    /// Accumulate text.
167    pub fn append_text(&mut self, text: &str, max_len: usize) {
168        let remaining = max_len.saturating_sub(self.text_accumulator.len());
169        if remaining > 0 {
170            let to_add = &text[..min(text.len(), remaining)];
171            self.text_accumulator.push_str(to_add);
172        }
173    }
174}
175
176/// Memory chunk allocator for bounded allocations.
177///
178/// Manages a pool of fixed-size chunks to avoid large contiguous allocations.
179pub struct ChunkAllocator {
180    chunk_size: usize,
181    max_chunks: usize,
182    chunks: Vec<Vec<u8>>,
183    allocated: usize,
184}
185
186impl ChunkAllocator {
187    /// Create a new chunk allocator.
188    pub fn new(chunk_size: usize, max_chunks: usize) -> Self {
189        Self {
190            chunk_size,
191            max_chunks,
192            chunks: Vec::with_capacity(max_chunks),
193            allocated: 0,
194        }
195    }
196
197    /// Get a chunk from the pool or allocate new.
198    pub fn acquire(&mut self) -> Option<Vec<u8>> {
199        if let Some(chunk) = self.chunks.pop() {
200            Some(chunk)
201        } else if self.allocated < self.max_chunks {
202            self.allocated += 1;
203            Some(Vec::with_capacity(self.chunk_size))
204        } else {
205            None
206        }
207    }
208
209    /// Return a chunk to the pool.
210    pub fn release(&mut self, mut chunk: Vec<u8>) {
211        if self.chunks.len() < self.max_chunks {
212            chunk.clear();
213            self.chunks.push(chunk);
214        }
215    }
216
217    /// Current number of available chunks.
218    pub fn available(&self) -> usize {
219        self.chunks.len()
220    }
221}
222
223/// Statistics for streaming operations.
224#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
225pub struct StreamingStats {
226    /// Total bytes read from source.
227    pub bytes_read: usize,
228    /// Total bytes processed.
229    pub bytes_processed: usize,
230    /// Number of events emitted.
231    pub events_emitted: usize,
232    /// Number of chunks processed.
233    pub chunks_processed: usize,
234    /// Peak memory usage estimate.
235    pub peak_memory_estimate: usize,
236}
237
238/// Streaming chapter processor that reads incrementally from ZIP.
239///
240/// This type provides true streaming without materializing the full
241/// chapter content in memory.
242pub struct StreamingChapterProcessor {
243    #[allow(dead_code)]
244    limits: ChunkLimits,
245    #[allow(dead_code)]
246    state: StreamingParseState,
247}
248
249/// Current state of streaming parse.
250#[derive(Clone, Debug)]
251#[allow(dead_code)]
252enum StreamingParseState {
253    /// Initial state, ready to start.
254    Initial,
255    /// Parsing in progress with partial content buffered.
256    Parsing {
257        /// Bytes processed so far
258        bytes_processed: usize,
259        /// Events emitted so far
260        events_emitted: usize,
261        /// Current element stack depth
262        stack_depth: usize,
263    },
264    /// Parsing complete.
265    Complete,
266    /// Error occurred during parsing.
267    Error(String),
268}
269
270#[cfg(feature = "std")]
271impl StreamingChapterProcessor {
272    /// Create a new streaming processor.
273    pub fn new(_options: RenderPrepOptions, limits: ChunkLimits) -> Self {
274        Self {
275            limits,
276            state: StreamingParseState::Initial,
277        }
278    }
279
280    /// Process a chunk of HTML bytes and emit styled items.
281    ///
282    /// Returns the number of items emitted. When the chunk is exhausted
283    /// but the document is not complete, returns Ok(0) to indicate
284    /// more data is needed.
285    pub fn process_chunk<F>(
286        &mut self,
287        _html_chunk: &[u8],
288        mut _on_item: F,
289    ) -> Result<usize, RenderPrepError>
290    where
291        F: FnMut(StyledEventOrRun),
292    {
293        let count = 0usize;
294
295        // For now, this is a placeholder implementation
296        // Full implementation would use incremental XML parsing
297        self.state = StreamingParseState::Complete;
298
299        Ok(count)
300    }
301
302    /// Check if parsing is complete.
303    pub fn is_complete(&self) -> bool {
304        matches!(self.state, StreamingParseState::Complete)
305    }
306}
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311
312    #[test]
313    fn test_scratch_buffers_embedded() {
314        let buffers = ScratchBuffers::embedded();
315        assert!(buffers.read_buf.capacity() <= 8192);
316        assert!(buffers.xml_buf.capacity() <= 4096);
317    }
318
319    #[test]
320    fn test_scratch_buffers_clear_preserves_capacity() {
321        let mut buffers = ScratchBuffers::desktop();
322        let read_cap = buffers.read_buf.capacity();
323
324        buffers.read_buf.extend_from_slice(b"test data");
325        buffers.clear();
326
327        assert!(buffers.read_buf.is_empty());
328        assert_eq!(buffers.read_buf.capacity(), read_cap);
329    }
330
331    #[test]
332    fn test_pagination_context_basic() {
333        let mut ctx = PaginationContext::new();
334        assert_eq!(ctx.page_number, 0);
335        assert_eq!(ctx.byte_offset, 0);
336
337        ctx.advance_bytes(100);
338        assert_eq!(ctx.byte_offset, 100);
339
340        ctx.next_page();
341        assert_eq!(ctx.page_number, 1);
342        assert!(ctx.text_accumulator.is_empty());
343    }
344
345    #[test]
346    fn test_pagination_context_stack() {
347        let mut ctx = PaginationContext::new();
348        ctx.push_element("html");
349        ctx.push_element("body");
350        ctx.push_element("p");
351
352        assert_eq!(ctx.element_stack.len(), 3);
353        assert_eq!(ctx.pop_element(), Some("p".to_string()));
354        assert_eq!(ctx.element_stack.len(), 2);
355    }
356
357    #[test]
358    fn test_chunk_allocator_basic() {
359        let mut allocator = ChunkAllocator::new(1024, 10);
360        assert_eq!(allocator.available(), 0);
361
362        let chunk = allocator.acquire().unwrap();
363        assert_eq!(chunk.capacity(), 1024);
364
365        allocator.release(chunk);
366        assert_eq!(allocator.available(), 1);
367    }
368
369    #[test]
370    fn test_chunk_allocator_exhaustion() {
371        let mut allocator = ChunkAllocator::new(1024, 2);
372        let _ = allocator.acquire();
373        let _ = allocator.acquire();
374        assert!(allocator.acquire().is_none()); // Exhausted
375    }
376}