ddex_builder/
memory_optimization.rs

1//! Memory optimization techniques for DDEX Builder
2//! 
3//! This module provides arena allocation, object pooling, and memory-efficient
4//! data structures to minimize memory usage and improve performance.
5
6use std::cell::RefCell;
7use std::collections::VecDeque;
8use std::mem;
9use indexmap::IndexMap;
10
11/// Arena allocator for temporary objects during build process
12pub struct Arena {
13    chunks: RefCell<Vec<Vec<u8>>>,
14    current_chunk: RefCell<usize>,
15    current_offset: RefCell<usize>,
16    chunk_size: usize,
17}
18
19impl Arena {
20    /// Create a new arena with specified chunk size
21    pub fn new(chunk_size: usize) -> Self {
22        Self {
23            chunks: RefCell::new(vec![Vec::with_capacity(chunk_size)]),
24            current_chunk: RefCell::new(0),
25            current_offset: RefCell::new(0),
26            chunk_size,
27        }
28    }
29    
30    /// Allocate space for a value in the arena (safe version using Box)
31    pub fn alloc<T>(&self, value: T) -> Box<T> {
32        // For security audit compliance, use safe Box allocation instead of raw pointers
33        // Track the allocation in our chunks for statistics
34        let size = std::mem::size_of::<T>();
35        
36        {
37            let mut chunks = self.chunks.borrow_mut();
38            if chunks.is_empty() || chunks.last().unwrap().len() + size > self.chunk_size {
39                // Need a new chunk
40                chunks.push(Vec::with_capacity(self.chunk_size));
41                *self.current_chunk.borrow_mut() = chunks.len() - 1;
42                *self.current_offset.borrow_mut() = 0;
43            }
44            
45            // Record the allocation in the current chunk
46            let current_chunk_idx = *self.current_chunk.borrow();
47            if let Some(chunk) = chunks.get_mut(current_chunk_idx) {
48                // Simulate allocation by adding to chunk length
49                chunk.resize(chunk.len() + size, 0);
50                *self.current_offset.borrow_mut() += size;
51            }
52        }
53        
54        Box::new(value)
55    }
56    
57    
58    /// Get total allocated memory
59    pub fn allocated_bytes(&self) -> usize {
60        self.chunks.borrow().iter()
61            .map(|chunk| chunk.len())
62            .sum()
63    }
64    
65    /// Get total capacity
66    pub fn capacity_bytes(&self) -> usize {
67        self.chunks.borrow().iter()
68            .map(|chunk| chunk.capacity())
69            .sum()
70    }
71    
72    /// Reset arena for reuse (keeps allocated chunks)
73    pub fn reset(&self) {
74        let mut chunks = self.chunks.borrow_mut();
75        for chunk in chunks.iter_mut() {
76            chunk.clear();
77        }
78        *self.current_chunk.borrow_mut() = 0;
79        *self.current_offset.borrow_mut() = 0;
80    }
81    
82    /// Clear all chunks and free memory
83    pub fn clear(&self) {
84        self.chunks.borrow_mut().clear();
85        *self.current_chunk.borrow_mut() = 0;
86        *self.current_offset.borrow_mut() = 0;
87    }
88}
89
90/// Object pool for frequently created/destroyed types
91pub struct ObjectPool<T> {
92    objects: RefCell<VecDeque<T>>,
93    factory: Box<dyn Fn() -> T>,
94    max_size: usize,
95}
96
97impl<T> ObjectPool<T> {
98    /// Create a new object pool
99    pub fn new<F>(factory: F, max_size: usize) -> Self 
100    where 
101        F: Fn() -> T + 'static,
102    {
103        Self {
104            objects: RefCell::new(VecDeque::new()),
105            factory: Box::new(factory),
106            max_size,
107        }
108    }
109    
110    /// Get an object from the pool (or create new one)
111    pub fn get(&self) -> PooledObject<T> {
112        let obj = self.objects.borrow_mut()
113            .pop_front()
114            .unwrap_or_else(|| (self.factory)());
115            
116        PooledObject {
117            object: Some(obj),
118            pool: self,
119        }
120    }
121    
122    /// Return an object to the pool
123    fn return_object(&self, obj: T) {
124        let mut objects = self.objects.borrow_mut();
125        if objects.len() < self.max_size {
126            objects.push_back(obj);
127        }
128        // If pool is full, drop the object
129    }
130    
131    /// Get current pool size
132    pub fn size(&self) -> usize {
133        self.objects.borrow().len()
134    }
135    
136    /// Clear the pool
137    pub fn clear(&self) {
138        self.objects.borrow_mut().clear();
139    }
140}
141
142/// RAII wrapper for pooled objects
143pub struct PooledObject<'a, T> {
144    object: Option<T>,
145    pool: &'a ObjectPool<T>,
146}
147
148impl<'a, T> PooledObject<'a, T> {
149    /// Get mutable reference to the pooled object
150    pub fn get_mut(&mut self) -> &mut T {
151        self.object.as_mut().unwrap()
152    }
153    
154    /// Get immutable reference to the pooled object
155    pub fn get(&self) -> &T {
156        self.object.as_ref().unwrap()
157    }
158}
159
160impl<'a, T> Drop for PooledObject<'a, T> {
161    fn drop(&mut self) {
162        if let Some(obj) = self.object.take() {
163            self.pool.return_object(obj);
164        }
165    }
166}
167
168/// Compact representation for DDEX elements to reduce memory usage
169#[derive(Debug, Clone)]
170pub struct CompactElement {
171    /// Name index in string table
172    name_idx: u32,
173    /// Namespace index (optional)
174    namespace_idx: Option<u32>,
175    /// Attributes (packed)
176    attributes: CompactAttributes,
177    /// Children indices
178    children: Vec<CompactNodeRef>,
179}
180
181/// Compact node reference (union type)
182#[derive(Debug, Clone)]
183pub enum CompactNodeRef {
184    Element(u32), // Index in element table
185    Text(u32),    // Index in string table
186    Comment(u32), // Index in string table
187}
188
189/// Compact attributes storage
190#[derive(Debug, Clone, Default)]
191pub struct CompactAttributes {
192    /// Packed attribute data: (key_idx, value_idx) pairs
193    data: Vec<(u32, u32)>,
194}
195
196impl CompactAttributes {
197    /// Add an attribute
198    pub fn insert(&mut self, key_idx: u32, value_idx: u32) {
199        self.data.push((key_idx, value_idx));
200    }
201    
202    /// Get number of attributes
203    pub fn len(&self) -> usize {
204        self.data.len()
205    }
206    
207    /// Check if empty
208    pub fn is_empty(&self) -> bool {
209        self.data.is_empty()
210    }
211    
212    /// Iterate over attributes
213    pub fn iter(&self) -> impl Iterator<Item = (u32, u32)> + '_ {
214        self.data.iter().copied()
215    }
216}
217
218/// Compact AST representation for memory efficiency
219#[derive(Debug)]
220pub struct CompactAST {
221    /// String table for all text content
222    strings: Vec<String>,
223    /// String lookup map
224    string_map: IndexMap<String, u32>,
225    /// Element table
226    elements: Vec<CompactElement>,
227    /// Root element index
228    root_idx: u32,
229    /// Namespace table
230    namespaces: Vec<(u32, u32)>, // (prefix_idx, uri_idx) pairs
231    /// Schema location index
232    schema_location_idx: Option<u32>,
233}
234
235impl CompactAST {
236    /// Create a new compact AST
237    pub fn new() -> Self {
238        Self {
239            strings: Vec::new(),
240            string_map: IndexMap::new(),
241            elements: Vec::new(),
242            root_idx: 0,
243            namespaces: Vec::new(),
244            schema_location_idx: None,
245        }
246    }
247    
248    /// Intern a string and return its index
249    pub fn intern_string(&mut self, s: &str) -> u32 {
250        if let Some(&idx) = self.string_map.get(s) {
251            return idx;
252        }
253        
254        let idx = self.strings.len() as u32;
255        self.strings.push(s.to_string());
256        self.string_map.insert(s.to_string(), idx);
257        idx
258    }
259    
260    /// Get string by index
261    pub fn get_string(&self, idx: u32) -> Option<&str> {
262        self.strings.get(idx as usize).map(|s| s.as_str())
263    }
264    
265    /// Add an element and return its index
266    pub fn add_element(&mut self, element: CompactElement) -> u32 {
267        let idx = self.elements.len() as u32;
268        self.elements.push(element);
269        idx
270    }
271    
272    /// Get element by index
273    pub fn get_element(&self, idx: u32) -> Option<&CompactElement> {
274        self.elements.get(idx as usize)
275    }
276    
277    /// Calculate memory footprint
278    pub fn memory_footprint(&self) -> usize {
279        let strings_size = self.strings.iter()
280            .map(|s| s.len())
281            .sum::<usize>();
282            
283        let map_size = self.string_map.len() * 
284            (mem::size_of::<String>() + mem::size_of::<u32>());
285            
286        let elements_size = self.elements.len() * mem::size_of::<CompactElement>();
287        
288        strings_size + map_size + elements_size
289    }
290    
291    /// Convert from regular AST (memory optimization pass)
292    pub fn from_ast(ast: &crate::ast::AST) -> Self {
293        let mut compact = CompactAST::new();
294        
295        // Intern namespace strings
296        for (prefix, uri) in &ast.namespaces {
297            let prefix_idx = compact.intern_string(prefix);
298            let uri_idx = compact.intern_string(uri);
299            compact.namespaces.push((prefix_idx, uri_idx));
300        }
301        
302        // Intern schema location if present
303        if let Some(ref location) = ast.schema_location {
304            compact.schema_location_idx = Some(compact.intern_string(location));
305        }
306        
307        // Convert root element
308        compact.root_idx = compact.convert_element(&ast.root);
309        
310        compact
311    }
312    
313    /// Convert an element to compact format
314    fn convert_element(&mut self, element: &crate::ast::Element) -> u32 {
315        let name_idx = self.intern_string(&element.name);
316        let namespace_idx = element.namespace.as_ref()
317            .map(|ns| self.intern_string(ns));
318        
319        // Convert attributes
320        let mut attributes = CompactAttributes::default();
321        for (key, value) in &element.attributes {
322            let key_idx = self.intern_string(key);
323            let value_idx = self.intern_string(value);
324            attributes.insert(key_idx, value_idx);
325        }
326        
327        // Convert children (placeholder - would need full recursive conversion)
328        let children = Vec::new(); // Simplified for now
329        
330        let compact_element = CompactElement {
331            name_idx,
332            namespace_idx,
333            attributes,
334            children,
335        };
336        
337        self.add_element(compact_element)
338    }
339}
340
341/// Lazy-loaded data structures for optional fields
342pub struct LazyField<T> {
343    value: RefCell<Option<T>>,
344    loader: Box<dyn Fn() -> T>,
345}
346
347impl<T: std::fmt::Debug> std::fmt::Debug for LazyField<T> {
348    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
349        f.debug_struct("LazyField")
350            .field("value", &self.value)
351            .field("loader", &"<function>")
352            .finish()
353    }
354}
355
356impl<T> LazyField<T> {
357    /// Create a new lazy field
358    pub fn new<F>(loader: F) -> Self 
359    where 
360        F: Fn() -> T + 'static,
361    {
362        Self {
363            value: RefCell::new(None),
364            loader: Box::new(loader),
365        }
366    }
367    
368    /// Get the value, loading if necessary
369    pub fn get(&self) -> std::cell::Ref<T> {
370        if self.value.borrow().is_none() {
371            *self.value.borrow_mut() = Some((self.loader)());
372        }
373        
374        std::cell::Ref::map(self.value.borrow(), |opt| opt.as_ref().unwrap())
375    }
376    
377    /// Check if value is loaded
378    pub fn is_loaded(&self) -> bool {
379        self.value.borrow().is_some()
380    }
381    
382    /// Clear the loaded value
383    pub fn clear(&self) {
384        *self.value.borrow_mut() = None;
385    }
386}
387
388/// Memory manager for the entire build process
389pub struct BuildMemoryManager {
390    /// Arena for temporary allocations
391    pub arena: Arena,
392    /// Element pool
393    pub element_pool: ObjectPool<crate::ast::Element>,
394    /// String pool for small strings
395    pub small_string_pool: ObjectPool<String>,
396    /// Large buffer pool for XML generation
397    pub buffer_pool: ObjectPool<Vec<u8>>,
398}
399
400impl BuildMemoryManager {
401    /// Create a new memory manager optimized for typical DDEX builds
402    pub fn new() -> Self {
403        Self {
404            arena: Arena::new(64 * 1024), // 64KB chunks
405            element_pool: ObjectPool::new(
406                || crate::ast::Element::new(""), 
407                100 // Keep up to 100 elements
408            ),
409            small_string_pool: ObjectPool::new(
410                || String::with_capacity(64),
411                50 // Keep up to 50 small strings
412            ),
413            buffer_pool: ObjectPool::new(
414                || Vec::with_capacity(8192), // 8KB buffers
415                10 // Keep up to 10 buffers
416            ),
417        }
418    }
419    
420    /// Get memory usage statistics
421    pub fn memory_usage(&self) -> MemoryStats {
422        MemoryStats {
423            arena_allocated: self.arena.allocated_bytes(),
424            arena_capacity: self.arena.capacity_bytes(),
425            element_pool_size: self.element_pool.size(),
426            string_pool_size: self.small_string_pool.size(),
427            buffer_pool_size: self.buffer_pool.size(),
428        }
429    }
430    
431    /// Reset manager for next build (keeps pools)
432    pub fn reset_for_next_build(&self) {
433        self.arena.reset();
434        // Pools reset automatically when objects are returned
435    }
436    
437    /// Full reset including all pools
438    pub fn full_reset(&self) {
439        self.arena.clear();
440        self.element_pool.clear();
441        self.small_string_pool.clear();
442        self.buffer_pool.clear();
443    }
444}
445
446impl Default for BuildMemoryManager {
447    fn default() -> Self {
448        Self::new()
449    }
450}
451
452/// Memory usage statistics
453#[derive(Debug, Clone)]
454pub struct MemoryStats {
455    pub arena_allocated: usize,
456    pub arena_capacity: usize,
457    pub element_pool_size: usize,
458    pub string_pool_size: usize,
459    pub buffer_pool_size: usize,
460}
461
462impl MemoryStats {
463    /// Get total memory usage estimate
464    pub fn total_bytes(&self) -> usize {
465        self.arena_capacity + 
466        (self.element_pool_size * mem::size_of::<crate::ast::Element>()) +
467        (self.string_pool_size * 64) + // Estimated string size
468        (self.buffer_pool_size * 8192) // Buffer size
469    }
470}
471
472#[cfg(test)]
473mod tests {
474    use super::*;
475    
476    #[test]
477    fn test_arena_allocation() {
478        let arena = Arena::new(1024);
479        
480        let val1 = arena.alloc(42u32);
481        let val2 = arena.alloc("hello world".to_string());
482        
483        assert_eq!(*val1, 42);
484        assert_eq!(*val2, "hello world");
485        
486        assert!(arena.allocated_bytes() > 0);
487    }
488    
489    #[test]
490    fn test_object_pool() {
491        let pool = ObjectPool::new(|| String::with_capacity(32), 5);
492        
493        {
494            let mut obj1 = pool.get();
495            obj1.get_mut().push_str("test");
496            assert_eq!(obj1.get(), "test");
497            
498            {
499                let obj2 = pool.get();
500                assert_eq!(pool.size(), 0); // Both objects checked out
501            }
502            // obj2 returned to pool
503        }
504        // obj1 returned to pool
505        
506        assert_eq!(pool.size(), 2);
507    }
508    
509    #[test]
510    fn test_compact_ast() {
511        let mut compact = CompactAST::new();
512        
513        let hello_idx = compact.intern_string("hello");
514        let hello_idx2 = compact.intern_string("hello"); // Should reuse
515        let world_idx = compact.intern_string("world");
516        
517        assert_eq!(hello_idx, hello_idx2);
518        assert_ne!(hello_idx, world_idx);
519        assert_eq!(compact.get_string(hello_idx), Some("hello"));
520        assert_eq!(compact.get_string(world_idx), Some("world"));
521    }
522    
523    #[test]
524    fn test_lazy_field() {
525        let counter = RefCell::new(0);
526        let lazy = LazyField::new(move || {
527            *counter.borrow_mut() += 1;
528            "computed".to_string()
529        });
530        
531        assert!(!lazy.is_loaded());
532        
533        let val = lazy.get();
534        assert_eq!(*val, "computed");
535        
536        // Second access shouldn't recompute
537        let val2 = lazy.get();
538        assert_eq!(*val2, "computed");
539    }
540    
541    #[test]
542    fn test_memory_manager() {
543        let manager = BuildMemoryManager::new();
544        let stats = manager.memory_usage();
545        
546        // Should start with some capacity but no allocation
547        assert_eq!(stats.arena_allocated, 0);
548        assert!(stats.arena_capacity > 0);
549    }
550}