ddex_builder/
memory_optimization.rs

1//! Memory optimization techniques for DDEX Builder
2//! 
3//! This module provides arena allocation, object pooling, and memory-efficient
4//! data structures to minimize memory usage and improve performance.
5
6use std::cell::RefCell;
7use std::collections::VecDeque;
8use std::mem;
9use indexmap::IndexMap;
10
11/// Arena allocator for temporary objects during build process
12pub struct Arena {
13    chunks: RefCell<Vec<Vec<u8>>>,
14    current_chunk: RefCell<usize>,
15    current_offset: RefCell<usize>,
16    chunk_size: usize,
17}
18
19impl Arena {
20    /// Create a new arena with specified chunk size
21    pub fn new(chunk_size: usize) -> Self {
22        Self {
23            chunks: RefCell::new(vec![Vec::with_capacity(chunk_size)]),
24            current_chunk: RefCell::new(0),
25            current_offset: RefCell::new(0),
26            chunk_size,
27        }
28    }
29    
30    /// Allocate space for a value in the arena (safe version using Box)
31    pub fn alloc<T>(&self, value: T) -> Box<T> {
32        // For security audit compliance, use safe Box allocation instead of raw pointers
33        Box::new(value)
34    }
35    
36    
37    /// Get total allocated memory
38    pub fn allocated_bytes(&self) -> usize {
39        self.chunks.borrow().iter()
40            .map(|chunk| chunk.len())
41            .sum()
42    }
43    
44    /// Get total capacity
45    pub fn capacity_bytes(&self) -> usize {
46        self.chunks.borrow().iter()
47            .map(|chunk| chunk.capacity())
48            .sum()
49    }
50    
51    /// Reset arena for reuse (keeps allocated chunks)
52    pub fn reset(&self) {
53        let mut chunks = self.chunks.borrow_mut();
54        for chunk in chunks.iter_mut() {
55            chunk.clear();
56        }
57        *self.current_chunk.borrow_mut() = 0;
58        *self.current_offset.borrow_mut() = 0;
59    }
60    
61    /// Clear all chunks and free memory
62    pub fn clear(&self) {
63        self.chunks.borrow_mut().clear();
64        *self.current_chunk.borrow_mut() = 0;
65        *self.current_offset.borrow_mut() = 0;
66    }
67}
68
69/// Object pool for frequently created/destroyed types
70pub struct ObjectPool<T> {
71    objects: RefCell<VecDeque<T>>,
72    factory: Box<dyn Fn() -> T>,
73    max_size: usize,
74}
75
76impl<T> ObjectPool<T> {
77    /// Create a new object pool
78    pub fn new<F>(factory: F, max_size: usize) -> Self 
79    where 
80        F: Fn() -> T + 'static,
81    {
82        Self {
83            objects: RefCell::new(VecDeque::new()),
84            factory: Box::new(factory),
85            max_size,
86        }
87    }
88    
89    /// Get an object from the pool (or create new one)
90    pub fn get(&self) -> PooledObject<T> {
91        let obj = self.objects.borrow_mut()
92            .pop_front()
93            .unwrap_or_else(|| (self.factory)());
94            
95        PooledObject {
96            object: Some(obj),
97            pool: self,
98        }
99    }
100    
101    /// Return an object to the pool
102    fn return_object(&self, obj: T) {
103        let mut objects = self.objects.borrow_mut();
104        if objects.len() < self.max_size {
105            objects.push_back(obj);
106        }
107        // If pool is full, drop the object
108    }
109    
110    /// Get current pool size
111    pub fn size(&self) -> usize {
112        self.objects.borrow().len()
113    }
114    
115    /// Clear the pool
116    pub fn clear(&self) {
117        self.objects.borrow_mut().clear();
118    }
119}
120
121/// RAII wrapper for pooled objects
122pub struct PooledObject<'a, T> {
123    object: Option<T>,
124    pool: &'a ObjectPool<T>,
125}
126
127impl<'a, T> PooledObject<'a, T> {
128    /// Get mutable reference to the pooled object
129    pub fn get_mut(&mut self) -> &mut T {
130        self.object.as_mut().unwrap()
131    }
132    
133    /// Get immutable reference to the pooled object
134    pub fn get(&self) -> &T {
135        self.object.as_ref().unwrap()
136    }
137}
138
139impl<'a, T> Drop for PooledObject<'a, T> {
140    fn drop(&mut self) {
141        if let Some(obj) = self.object.take() {
142            self.pool.return_object(obj);
143        }
144    }
145}
146
147/// Compact representation for DDEX elements to reduce memory usage
148#[derive(Debug, Clone)]
149pub struct CompactElement {
150    /// Name index in string table
151    name_idx: u32,
152    /// Namespace index (optional)
153    namespace_idx: Option<u32>,
154    /// Attributes (packed)
155    attributes: CompactAttributes,
156    /// Children indices
157    children: Vec<CompactNodeRef>,
158}
159
160/// Compact node reference (union type)
161#[derive(Debug, Clone)]
162pub enum CompactNodeRef {
163    Element(u32), // Index in element table
164    Text(u32),    // Index in string table
165    Comment(u32), // Index in string table
166}
167
168/// Compact attributes storage
169#[derive(Debug, Clone, Default)]
170pub struct CompactAttributes {
171    /// Packed attribute data: (key_idx, value_idx) pairs
172    data: Vec<(u32, u32)>,
173}
174
175impl CompactAttributes {
176    /// Add an attribute
177    pub fn insert(&mut self, key_idx: u32, value_idx: u32) {
178        self.data.push((key_idx, value_idx));
179    }
180    
181    /// Get number of attributes
182    pub fn len(&self) -> usize {
183        self.data.len()
184    }
185    
186    /// Check if empty
187    pub fn is_empty(&self) -> bool {
188        self.data.is_empty()
189    }
190    
191    /// Iterate over attributes
192    pub fn iter(&self) -> impl Iterator<Item = (u32, u32)> + '_ {
193        self.data.iter().copied()
194    }
195}
196
197/// Compact AST representation for memory efficiency
198#[derive(Debug)]
199pub struct CompactAST {
200    /// String table for all text content
201    strings: Vec<String>,
202    /// String lookup map
203    string_map: IndexMap<String, u32>,
204    /// Element table
205    elements: Vec<CompactElement>,
206    /// Root element index
207    root_idx: u32,
208    /// Namespace table
209    namespaces: Vec<(u32, u32)>, // (prefix_idx, uri_idx) pairs
210    /// Schema location index
211    schema_location_idx: Option<u32>,
212}
213
214impl CompactAST {
215    /// Create a new compact AST
216    pub fn new() -> Self {
217        Self {
218            strings: Vec::new(),
219            string_map: IndexMap::new(),
220            elements: Vec::new(),
221            root_idx: 0,
222            namespaces: Vec::new(),
223            schema_location_idx: None,
224        }
225    }
226    
227    /// Intern a string and return its index
228    pub fn intern_string(&mut self, s: &str) -> u32 {
229        if let Some(&idx) = self.string_map.get(s) {
230            return idx;
231        }
232        
233        let idx = self.strings.len() as u32;
234        self.strings.push(s.to_string());
235        self.string_map.insert(s.to_string(), idx);
236        idx
237    }
238    
239    /// Get string by index
240    pub fn get_string(&self, idx: u32) -> Option<&str> {
241        self.strings.get(idx as usize).map(|s| s.as_str())
242    }
243    
244    /// Add an element and return its index
245    pub fn add_element(&mut self, element: CompactElement) -> u32 {
246        let idx = self.elements.len() as u32;
247        self.elements.push(element);
248        idx
249    }
250    
251    /// Get element by index
252    pub fn get_element(&self, idx: u32) -> Option<&CompactElement> {
253        self.elements.get(idx as usize)
254    }
255    
256    /// Calculate memory footprint
257    pub fn memory_footprint(&self) -> usize {
258        let strings_size = self.strings.iter()
259            .map(|s| s.len())
260            .sum::<usize>();
261            
262        let map_size = self.string_map.len() * 
263            (mem::size_of::<String>() + mem::size_of::<u32>());
264            
265        let elements_size = self.elements.len() * mem::size_of::<CompactElement>();
266        
267        strings_size + map_size + elements_size
268    }
269    
270    /// Convert from regular AST (memory optimization pass)
271    pub fn from_ast(ast: &crate::ast::AST) -> Self {
272        let mut compact = CompactAST::new();
273        
274        // Intern namespace strings
275        for (prefix, uri) in &ast.namespaces {
276            let prefix_idx = compact.intern_string(prefix);
277            let uri_idx = compact.intern_string(uri);
278            compact.namespaces.push((prefix_idx, uri_idx));
279        }
280        
281        // Intern schema location if present
282        if let Some(ref location) = ast.schema_location {
283            compact.schema_location_idx = Some(compact.intern_string(location));
284        }
285        
286        // Convert root element
287        compact.root_idx = compact.convert_element(&ast.root);
288        
289        compact
290    }
291    
292    /// Convert an element to compact format
293    fn convert_element(&mut self, element: &crate::ast::Element) -> u32 {
294        let name_idx = self.intern_string(&element.name);
295        let namespace_idx = element.namespace.as_ref()
296            .map(|ns| self.intern_string(ns));
297        
298        // Convert attributes
299        let mut attributes = CompactAttributes::default();
300        for (key, value) in &element.attributes {
301            let key_idx = self.intern_string(key);
302            let value_idx = self.intern_string(value);
303            attributes.insert(key_idx, value_idx);
304        }
305        
306        // Convert children (placeholder - would need full recursive conversion)
307        let children = Vec::new(); // Simplified for now
308        
309        let compact_element = CompactElement {
310            name_idx,
311            namespace_idx,
312            attributes,
313            children,
314        };
315        
316        self.add_element(compact_element)
317    }
318}
319
320/// Lazy-loaded data structures for optional fields
321pub struct LazyField<T> {
322    value: RefCell<Option<T>>,
323    loader: Box<dyn Fn() -> T>,
324}
325
326impl<T: std::fmt::Debug> std::fmt::Debug for LazyField<T> {
327    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
328        f.debug_struct("LazyField")
329            .field("value", &self.value)
330            .field("loader", &"<function>")
331            .finish()
332    }
333}
334
335impl<T> LazyField<T> {
336    /// Create a new lazy field
337    pub fn new<F>(loader: F) -> Self 
338    where 
339        F: Fn() -> T + 'static,
340    {
341        Self {
342            value: RefCell::new(None),
343            loader: Box::new(loader),
344        }
345    }
346    
347    /// Get the value, loading if necessary
348    pub fn get(&self) -> std::cell::Ref<T> {
349        if self.value.borrow().is_none() {
350            *self.value.borrow_mut() = Some((self.loader)());
351        }
352        
353        std::cell::Ref::map(self.value.borrow(), |opt| opt.as_ref().unwrap())
354    }
355    
356    /// Check if value is loaded
357    pub fn is_loaded(&self) -> bool {
358        self.value.borrow().is_some()
359    }
360    
361    /// Clear the loaded value
362    pub fn clear(&self) {
363        *self.value.borrow_mut() = None;
364    }
365}
366
367/// Memory manager for the entire build process
368pub struct BuildMemoryManager {
369    /// Arena for temporary allocations
370    pub arena: Arena,
371    /// Element pool
372    pub element_pool: ObjectPool<crate::ast::Element>,
373    /// String pool for small strings
374    pub small_string_pool: ObjectPool<String>,
375    /// Large buffer pool for XML generation
376    pub buffer_pool: ObjectPool<Vec<u8>>,
377}
378
379impl BuildMemoryManager {
380    /// Create a new memory manager optimized for typical DDEX builds
381    pub fn new() -> Self {
382        Self {
383            arena: Arena::new(64 * 1024), // 64KB chunks
384            element_pool: ObjectPool::new(
385                || crate::ast::Element::new(""), 
386                100 // Keep up to 100 elements
387            ),
388            small_string_pool: ObjectPool::new(
389                || String::with_capacity(64),
390                50 // Keep up to 50 small strings
391            ),
392            buffer_pool: ObjectPool::new(
393                || Vec::with_capacity(8192), // 8KB buffers
394                10 // Keep up to 10 buffers
395            ),
396        }
397    }
398    
399    /// Get memory usage statistics
400    pub fn memory_usage(&self) -> MemoryStats {
401        MemoryStats {
402            arena_allocated: self.arena.allocated_bytes(),
403            arena_capacity: self.arena.capacity_bytes(),
404            element_pool_size: self.element_pool.size(),
405            string_pool_size: self.small_string_pool.size(),
406            buffer_pool_size: self.buffer_pool.size(),
407        }
408    }
409    
410    /// Reset manager for next build (keeps pools)
411    pub fn reset_for_next_build(&self) {
412        self.arena.reset();
413        // Pools reset automatically when objects are returned
414    }
415    
416    /// Full reset including all pools
417    pub fn full_reset(&self) {
418        self.arena.clear();
419        self.element_pool.clear();
420        self.small_string_pool.clear();
421        self.buffer_pool.clear();
422    }
423}
424
425impl Default for BuildMemoryManager {
426    fn default() -> Self {
427        Self::new()
428    }
429}
430
431/// Memory usage statistics
432#[derive(Debug, Clone)]
433pub struct MemoryStats {
434    pub arena_allocated: usize,
435    pub arena_capacity: usize,
436    pub element_pool_size: usize,
437    pub string_pool_size: usize,
438    pub buffer_pool_size: usize,
439}
440
441impl MemoryStats {
442    /// Get total memory usage estimate
443    pub fn total_bytes(&self) -> usize {
444        self.arena_capacity + 
445        (self.element_pool_size * mem::size_of::<crate::ast::Element>()) +
446        (self.string_pool_size * 64) + // Estimated string size
447        (self.buffer_pool_size * 8192) // Buffer size
448    }
449}
450
451#[cfg(test)]
452mod tests {
453    use super::*;
454    
455    #[test]
456    fn test_arena_allocation() {
457        let arena = Arena::new(1024);
458        
459        let val1 = arena.alloc(42u32);
460        let val2 = arena.alloc("hello world".to_string());
461        
462        assert_eq!(*val1, 42);
463        assert_eq!(*val2, "hello world");
464        
465        assert!(arena.allocated_bytes() > 0);
466    }
467    
468    #[test]
469    fn test_object_pool() {
470        let pool = ObjectPool::new(|| String::with_capacity(32), 5);
471        
472        {
473            let mut obj1 = pool.get();
474            obj1.get_mut().push_str("test");
475            assert_eq!(obj1.get(), "test");
476            
477            {
478                let obj2 = pool.get();
479                assert_eq!(pool.size(), 0); // Both objects checked out
480            }
481            // obj2 returned to pool
482        }
483        // obj1 returned to pool
484        
485        assert_eq!(pool.size(), 2);
486    }
487    
488    #[test]
489    fn test_compact_ast() {
490        let mut compact = CompactAST::new();
491        
492        let hello_idx = compact.intern_string("hello");
493        let hello_idx2 = compact.intern_string("hello"); // Should reuse
494        let world_idx = compact.intern_string("world");
495        
496        assert_eq!(hello_idx, hello_idx2);
497        assert_ne!(hello_idx, world_idx);
498        assert_eq!(compact.get_string(hello_idx), Some("hello"));
499        assert_eq!(compact.get_string(world_idx), Some("world"));
500    }
501    
502    #[test]
503    fn test_lazy_field() {
504        let counter = RefCell::new(0);
505        let lazy = LazyField::new(move || {
506            *counter.borrow_mut() += 1;
507            "computed".to_string()
508        });
509        
510        assert!(!lazy.is_loaded());
511        
512        let val = lazy.get();
513        assert_eq!(*val, "computed");
514        
515        // Second access shouldn't recompute
516        let val2 = lazy.get();
517        assert_eq!(*val2, "computed");
518    }
519    
520    #[test]
521    fn test_memory_manager() {
522        let manager = BuildMemoryManager::new();
523        let stats = manager.memory_usage();
524        
525        // Should start with some capacity but no allocation
526        assert_eq!(stats.arena_allocated, 0);
527        assert!(stats.arena_capacity > 0);
528    }
529}