oxidize_pdf/memory/
mod.rs

1//! Memory optimization module for efficient PDF handling
2//!
3//! This module provides memory-efficient strategies for working with large PDF files,
4//! including lazy loading, streaming, and smart resource management.
5//!
6//! # Features
7//!
8//! - **Lazy Loading**: Load PDF objects only when accessed
9//! - **Memory Mapping**: Use OS memory mapping for large files  
10//! - **Smart Caching**: LRU cache for frequently accessed objects
11//! - **Stream Processing**: Process content without loading entire file
12//! - **Resource Pooling**: Reuse buffers and temporary objects
13//!
14//! # Example
15//!
16//! ```rust,no_run
17//! use oxidize_pdf::memory::{LazyDocument, MemoryOptions};
18//! use oxidize_pdf::parser::PdfReader;
19//!
20//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
21//! // Configure memory options
22//! let options = MemoryOptions::default()
23//!     .with_cache_size(100) // Cache up to 100 objects
24//!     .with_lazy_loading(true)
25//!     .with_memory_mapping(true);
26//!
27//! // Open document with lazy loading
28//! let reader = PdfReader::open("large_document.pdf")?;
29//! let document = LazyDocument::new(reader, options)?;
30//!
31//! // Objects are loaded only when accessed
32//! let page = document.get_page(0)?; // Only loads this page
33//! println!("Page loaded: {}x{}", page.width(), page.height());
34//!
35//! // Memory usage remains low even for large PDFs
36//! # Ok(())
37//! # }
38//! ```
39
40use std::sync::{Arc, RwLock};
41
42pub mod cache;
43pub mod lazy_loader;
44pub mod memory_mapped;
45pub mod stream_processor;
46
47// Re-export main types
48pub use cache::{LruCache, ObjectCache};
49pub use lazy_loader::{LazyDocument, LazyObject};
50pub use memory_mapped::{MappedReader, MemoryMappedFile};
51pub use stream_processor::{ProcessingAction, ProcessingEvent, StreamProcessor, StreamingOptions};
52
53/// Configuration options for memory optimization
54#[derive(Debug, Clone)]
55pub struct MemoryOptions {
56    /// Enable lazy loading of objects
57    pub lazy_loading: bool,
58    /// Enable memory mapping for file access
59    pub memory_mapping: bool,
60    /// Maximum number of objects to cache
61    pub cache_size: usize,
62    /// Enable streaming mode for content
63    pub streaming: bool,
64    /// Buffer size for streaming operations
65    pub buffer_size: usize,
66    /// Threshold for using memory mapping (bytes)
67    pub mmap_threshold: usize,
68}
69
70impl Default for MemoryOptions {
71    fn default() -> Self {
72        Self {
73            lazy_loading: true,
74            memory_mapping: true,
75            cache_size: 1000,
76            streaming: true,
77            buffer_size: 64 * 1024,           // 64KB
78            mmap_threshold: 10 * 1024 * 1024, // 10MB
79        }
80    }
81}
82
83impl MemoryOptions {
84    /// Create options optimized for small PDFs
85    pub fn small_file() -> Self {
86        Self {
87            lazy_loading: false,
88            memory_mapping: false,
89            cache_size: 0,
90            streaming: false,
91            buffer_size: 16 * 1024,
92            mmap_threshold: usize::MAX,
93        }
94    }
95
96    /// Create options optimized for large PDFs
97    pub fn large_file() -> Self {
98        Self {
99            lazy_loading: true,
100            memory_mapping: true,
101            cache_size: 5000,
102            streaming: true,
103            buffer_size: 256 * 1024,
104            mmap_threshold: 1024 * 1024, // 1MB
105        }
106    }
107
108    /// Enable lazy loading
109    pub fn with_lazy_loading(mut self, enabled: bool) -> Self {
110        self.lazy_loading = enabled;
111        self
112    }
113
114    /// Enable memory mapping
115    pub fn with_memory_mapping(mut self, enabled: bool) -> Self {
116        self.memory_mapping = enabled;
117        self
118    }
119
120    /// Set cache size
121    pub fn with_cache_size(mut self, size: usize) -> Self {
122        self.cache_size = size;
123        self
124    }
125
126    /// Enable streaming
127    pub fn with_streaming(mut self, enabled: bool) -> Self {
128        self.streaming = enabled;
129        self
130    }
131}
132
133/// Memory usage statistics
134#[derive(Debug, Clone, Default)]
135pub struct MemoryStats {
136    /// Total memory allocated
137    pub allocated_bytes: usize,
138    /// Number of cached objects
139    pub cached_objects: usize,
140    /// Number of cache hits
141    pub cache_hits: usize,
142    /// Number of cache misses
143    pub cache_misses: usize,
144    /// Number of lazy loads
145    pub lazy_loads: usize,
146    /// Memory mapped regions
147    pub mapped_regions: usize,
148}
149
150/// Memory manager for tracking and optimizing memory usage
151pub struct MemoryManager {
152    #[allow(dead_code)]
153    options: MemoryOptions,
154    stats: Arc<RwLock<MemoryStats>>,
155    cache: Option<ObjectCache>,
156}
157
158impl MemoryManager {
159    /// Create a new memory manager
160    pub fn new(options: MemoryOptions) -> Self {
161        let cache = if options.cache_size > 0 {
162            Some(ObjectCache::new(options.cache_size))
163        } else {
164            None
165        };
166
167        Self {
168            options,
169            stats: Arc::new(RwLock::new(MemoryStats::default())),
170            cache,
171        }
172    }
173
174    /// Get memory statistics
175    pub fn stats(&self) -> MemoryStats {
176        self.stats.read().unwrap().clone()
177    }
178
179    /// Record a memory allocation
180    pub fn record_allocation(&self, bytes: usize) {
181        if let Ok(mut stats) = self.stats.write() {
182            stats.allocated_bytes += bytes;
183        }
184    }
185
186    /// Record a cache hit
187    pub fn record_cache_hit(&self) {
188        if let Ok(mut stats) = self.stats.write() {
189            stats.cache_hits += 1;
190        }
191    }
192
193    /// Record a cache miss
194    pub fn record_cache_miss(&self) {
195        if let Ok(mut stats) = self.stats.write() {
196            stats.cache_misses += 1;
197        }
198    }
199
200    /// Get the object cache
201    pub fn cache(&self) -> Option<&ObjectCache> {
202        self.cache.as_ref()
203    }
204}
205
206#[cfg(test)]
207mod tests {
208    use super::*;
209
210    #[test]
211    fn test_memory_options_default() {
212        let options = MemoryOptions::default();
213        assert!(options.lazy_loading);
214        assert!(options.memory_mapping);
215        assert_eq!(options.cache_size, 1000);
216        assert!(options.streaming);
217        assert_eq!(options.buffer_size, 64 * 1024);
218    }
219
220    #[test]
221    fn test_memory_options_small_file() {
222        let options = MemoryOptions::small_file();
223        assert!(!options.lazy_loading);
224        assert!(!options.memory_mapping);
225        assert_eq!(options.cache_size, 0);
226        assert!(!options.streaming);
227    }
228
229    #[test]
230    fn test_memory_options_large_file() {
231        let options = MemoryOptions::large_file();
232        assert!(options.lazy_loading);
233        assert!(options.memory_mapping);
234        assert_eq!(options.cache_size, 5000);
235        assert!(options.streaming);
236        assert_eq!(options.buffer_size, 256 * 1024);
237    }
238
239    #[test]
240    fn test_memory_options_builder() {
241        let options = MemoryOptions::default()
242            .with_lazy_loading(false)
243            .with_memory_mapping(false)
244            .with_cache_size(500)
245            .with_streaming(false);
246
247        assert!(!options.lazy_loading);
248        assert!(!options.memory_mapping);
249        assert_eq!(options.cache_size, 500);
250        assert!(!options.streaming);
251    }
252
253    #[test]
254    fn test_memory_stats() {
255        let stats = MemoryStats::default();
256        assert_eq!(stats.allocated_bytes, 0);
257        assert_eq!(stats.cached_objects, 0);
258        assert_eq!(stats.cache_hits, 0);
259        assert_eq!(stats.cache_misses, 0);
260        assert_eq!(stats.lazy_loads, 0);
261        assert_eq!(stats.mapped_regions, 0);
262    }
263
264    #[test]
265    fn test_memory_manager() {
266        let options = MemoryOptions::default();
267        let manager = MemoryManager::new(options);
268
269        // Test statistics recording
270        manager.record_allocation(1024);
271        manager.record_cache_hit();
272        manager.record_cache_miss();
273
274        let stats = manager.stats();
275        assert_eq!(stats.allocated_bytes, 1024);
276        assert_eq!(stats.cache_hits, 1);
277        assert_eq!(stats.cache_misses, 1);
278
279        // Test cache existence
280        assert!(manager.cache().is_some());
281    }
282
283    #[test]
284    fn test_memory_manager_no_cache() {
285        let options = MemoryOptions::default().with_cache_size(0);
286        let manager = MemoryManager::new(options);
287
288        // Cache should not exist when size is 0
289        assert!(manager.cache().is_none());
290    }
291}