oxidize_pdf/memory/
mod.rs

1//! Memory optimization module for efficient PDF handling
2//!
3//! This module provides memory-efficient strategies for working with large PDF files,
4//! including lazy loading, streaming, and smart resource management.
5//!
6//! # Features
7//!
8//! - **Lazy Loading**: Load PDF objects only when accessed
9//! - **Memory Mapping**: Use OS memory mapping for large files  
10//! - **Smart Caching**: LRU cache for frequently accessed objects
11//! - **Stream Processing**: Process content without loading entire file
12//! - **Resource Pooling**: Reuse buffers and temporary objects
13//!
14//! # Example
15//!
16//! ```rust,no_run
17//! use oxidize_pdf::memory::{LazyDocument, MemoryOptions};
18//! use oxidize_pdf::parser::PdfReader;
19//!
20//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
21//! // Configure memory options
22//! let options = MemoryOptions::default()
23//!     .with_cache_size(100) // Cache up to 100 objects
24//!     .with_lazy_loading(true)
25//!     .with_memory_mapping(true);
26//!
27//! // Open document with lazy loading
28//! let reader = PdfReader::open("large_document.pdf")?;
29//! let document = LazyDocument::new(reader, options)?;
30//!
31//! // Objects are loaded only when accessed
32//! let page = document.get_page(0)?; // Only loads this page
33//! println!("Page loaded: {}x{}", page.width(), page.height());
34//!
35//! // Memory usage remains low even for large PDFs
36//! # Ok(())
37//! # }
38//! ```
39
40use std::sync::{Arc, RwLock};
41
42pub mod cache;
43pub mod lazy_loader;
44pub mod memory_mapped;
45pub mod stream_processor;
46
47// Re-export main types
48pub use cache::{LruCache, ObjectCache};
49pub use lazy_loader::{LazyDocument, LazyObject};
50pub use memory_mapped::{MappedReader, MemoryMappedFile};
51pub use stream_processor::{ProcessingAction, ProcessingEvent, StreamProcessor, StreamingOptions};
52
53/// Configuration options for memory optimization
54#[derive(Debug, Clone)]
55pub struct MemoryOptions {
56    /// Enable lazy loading of objects
57    pub lazy_loading: bool,
58    /// Enable memory mapping for file access
59    pub memory_mapping: bool,
60    /// Maximum number of objects to cache
61    pub cache_size: usize,
62    /// Enable streaming mode for content
63    pub streaming: bool,
64    /// Buffer size for streaming operations
65    pub buffer_size: usize,
66    /// Threshold for using memory mapping (bytes)
67    pub mmap_threshold: usize,
68}
69
70impl Default for MemoryOptions {
71    fn default() -> Self {
72        Self {
73            lazy_loading: true,
74            memory_mapping: true,
75            cache_size: 1000,
76            streaming: true,
77            buffer_size: 64 * 1024,           // 64KB
78            mmap_threshold: 10 * 1024 * 1024, // 10MB
79        }
80    }
81}
82
83impl MemoryOptions {
84    /// Create options optimized for small PDFs
85    pub fn small_file() -> Self {
86        Self {
87            lazy_loading: false,
88            memory_mapping: false,
89            cache_size: 0,
90            streaming: false,
91            buffer_size: 16 * 1024,
92            mmap_threshold: usize::MAX,
93        }
94    }
95
96    /// Create options optimized for large PDFs
97    pub fn large_file() -> Self {
98        Self {
99            lazy_loading: true,
100            memory_mapping: true,
101            cache_size: 5000,
102            streaming: true,
103            buffer_size: 256 * 1024,
104            mmap_threshold: 1024 * 1024, // 1MB
105        }
106    }
107
108    /// Enable lazy loading
109    pub fn with_lazy_loading(mut self, enabled: bool) -> Self {
110        self.lazy_loading = enabled;
111        self
112    }
113
114    /// Enable memory mapping
115    pub fn with_memory_mapping(mut self, enabled: bool) -> Self {
116        self.memory_mapping = enabled;
117        self
118    }
119
120    /// Set cache size
121    pub fn with_cache_size(mut self, size: usize) -> Self {
122        self.cache_size = size;
123        self
124    }
125
126    /// Enable streaming
127    pub fn with_streaming(mut self, enabled: bool) -> Self {
128        self.streaming = enabled;
129        self
130    }
131}
132
133/// Memory usage statistics
134#[derive(Debug, Clone, Default)]
135pub struct MemoryStats {
136    /// Total memory allocated
137    pub allocated_bytes: usize,
138    /// Number of cached objects
139    pub cached_objects: usize,
140    /// Number of cache hits
141    pub cache_hits: usize,
142    /// Number of cache misses
143    pub cache_misses: usize,
144    /// Number of lazy loads
145    pub lazy_loads: usize,
146    /// Memory mapped regions
147    pub mapped_regions: usize,
148}
149
150/// Memory manager for tracking and optimizing memory usage
151pub struct MemoryManager {
152    #[allow(dead_code)]
153    options: MemoryOptions,
154    stats: Arc<RwLock<MemoryStats>>,
155    cache: Option<ObjectCache>,
156}
157
158impl MemoryManager {
159    /// Create a new memory manager
160    pub fn new(options: MemoryOptions) -> Self {
161        let cache = if options.cache_size > 0 {
162            Some(ObjectCache::new(options.cache_size))
163        } else {
164            None
165        };
166
167        Self {
168            options,
169            stats: Arc::new(RwLock::new(MemoryStats::default())),
170            cache,
171        }
172    }
173
174    /// Get memory statistics
175    pub fn stats(&self) -> MemoryStats {
176        self.stats
177            .read()
178            .map(|stats| stats.clone())
179            .unwrap_or_else(|_| MemoryStats::default())
180    }
181
182    /// Record a memory allocation
183    pub fn record_allocation(&self, bytes: usize) {
184        if let Ok(mut stats) = self.stats.write() {
185            stats.allocated_bytes += bytes;
186        }
187    }
188
189    /// Record a cache hit
190    pub fn record_cache_hit(&self) {
191        if let Ok(mut stats) = self.stats.write() {
192            stats.cache_hits += 1;
193        }
194    }
195
196    /// Record a cache miss
197    pub fn record_cache_miss(&self) {
198        if let Ok(mut stats) = self.stats.write() {
199            stats.cache_misses += 1;
200        }
201    }
202
203    /// Get the object cache
204    pub fn cache(&self) -> Option<&ObjectCache> {
205        self.cache.as_ref()
206    }
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212
213    #[test]
214    fn test_memory_options_default() {
215        let options = MemoryOptions::default();
216        assert!(options.lazy_loading);
217        assert!(options.memory_mapping);
218        assert_eq!(options.cache_size, 1000);
219        assert!(options.streaming);
220        assert_eq!(options.buffer_size, 64 * 1024);
221    }
222
223    #[test]
224    fn test_memory_options_small_file() {
225        let options = MemoryOptions::small_file();
226        assert!(!options.lazy_loading);
227        assert!(!options.memory_mapping);
228        assert_eq!(options.cache_size, 0);
229        assert!(!options.streaming);
230    }
231
232    #[test]
233    fn test_memory_options_large_file() {
234        let options = MemoryOptions::large_file();
235        assert!(options.lazy_loading);
236        assert!(options.memory_mapping);
237        assert_eq!(options.cache_size, 5000);
238        assert!(options.streaming);
239        assert_eq!(options.buffer_size, 256 * 1024);
240    }
241
242    #[test]
243    fn test_memory_options_builder() {
244        let options = MemoryOptions::default()
245            .with_lazy_loading(false)
246            .with_memory_mapping(false)
247            .with_cache_size(500)
248            .with_streaming(false);
249
250        assert!(!options.lazy_loading);
251        assert!(!options.memory_mapping);
252        assert_eq!(options.cache_size, 500);
253        assert!(!options.streaming);
254    }
255
256    #[test]
257    fn test_memory_stats() {
258        let stats = MemoryStats::default();
259        assert_eq!(stats.allocated_bytes, 0);
260        assert_eq!(stats.cached_objects, 0);
261        assert_eq!(stats.cache_hits, 0);
262        assert_eq!(stats.cache_misses, 0);
263        assert_eq!(stats.lazy_loads, 0);
264        assert_eq!(stats.mapped_regions, 0);
265    }
266
267    #[test]
268    fn test_memory_manager() {
269        let options = MemoryOptions::default();
270        let manager = MemoryManager::new(options);
271
272        // Test statistics recording
273        manager.record_allocation(1024);
274        manager.record_cache_hit();
275        manager.record_cache_miss();
276
277        let stats = manager.stats();
278        assert_eq!(stats.allocated_bytes, 1024);
279        assert_eq!(stats.cache_hits, 1);
280        assert_eq!(stats.cache_misses, 1);
281
282        // Test cache existence
283        assert!(manager.cache().is_some());
284    }
285
286    #[test]
287    fn test_memory_manager_no_cache() {
288        let options = MemoryOptions::default().with_cache_size(0);
289        let manager = MemoryManager::new(options);
290
291        // Cache should not exist when size is 0
292        assert!(manager.cache().is_none());
293    }
294}