ddex_builder/
memory_optimization.rs

1//! Memory optimization techniques for DDEX Builder
2//!
3//! This module provides arena allocation, object pooling, and memory-efficient
4//! data structures to minimize memory usage and improve performance.
5
6use indexmap::IndexMap;
7use std::cell::RefCell;
8use std::collections::VecDeque;
9use std::mem;
10
11/// Arena allocator for temporary objects during build process
12pub struct Arena {
13    chunks: RefCell<Vec<Vec<u8>>>,
14    current_chunk: RefCell<usize>,
15    current_offset: RefCell<usize>,
16    chunk_size: usize,
17}
18
19impl Arena {
20    /// Create a new arena with specified chunk size
21    pub fn new(chunk_size: usize) -> Self {
22        Self {
23            chunks: RefCell::new(vec![Vec::with_capacity(chunk_size)]),
24            current_chunk: RefCell::new(0),
25            current_offset: RefCell::new(0),
26            chunk_size,
27        }
28    }
29
30    /// Allocate space for a value in the arena (safe version using Box)
31    pub fn alloc<T>(&self, value: T) -> Box<T> {
32        // For security audit compliance, use safe Box allocation instead of raw pointers
33        // Track the allocation in our chunks for statistics
34        let size = std::mem::size_of::<T>();
35
36        {
37            let mut chunks = self.chunks.borrow_mut();
38            if chunks.is_empty() || chunks.last().unwrap().len() + size > self.chunk_size {
39                // Need a new chunk
40                chunks.push(Vec::with_capacity(self.chunk_size));
41                *self.current_chunk.borrow_mut() = chunks.len() - 1;
42                *self.current_offset.borrow_mut() = 0;
43            }
44
45            // Record the allocation in the current chunk
46            let current_chunk_idx = *self.current_chunk.borrow();
47            if let Some(chunk) = chunks.get_mut(current_chunk_idx) {
48                // Simulate allocation by adding to chunk length
49                chunk.resize(chunk.len() + size, 0);
50                *self.current_offset.borrow_mut() += size;
51            }
52        }
53
54        Box::new(value)
55    }
56
57    /// Get total allocated memory
58    pub fn allocated_bytes(&self) -> usize {
59        self.chunks.borrow().iter().map(|chunk| chunk.len()).sum()
60    }
61
62    /// Get total capacity
63    pub fn capacity_bytes(&self) -> usize {
64        self.chunks
65            .borrow()
66            .iter()
67            .map(|chunk| chunk.capacity())
68            .sum()
69    }
70
71    /// Reset arena for reuse (keeps allocated chunks)
72    pub fn reset(&self) {
73        let mut chunks = self.chunks.borrow_mut();
74        for chunk in chunks.iter_mut() {
75            chunk.clear();
76        }
77        *self.current_chunk.borrow_mut() = 0;
78        *self.current_offset.borrow_mut() = 0;
79    }
80
81    /// Clear all chunks and free memory
82    pub fn clear(&self) {
83        self.chunks.borrow_mut().clear();
84        *self.current_chunk.borrow_mut() = 0;
85        *self.current_offset.borrow_mut() = 0;
86    }
87}
88
89/// Object pool for frequently created/destroyed types
90pub struct ObjectPool<T> {
91    objects: RefCell<VecDeque<T>>,
92    factory: Box<dyn Fn() -> T>,
93    max_size: usize,
94}
95
96impl<T> ObjectPool<T> {
97    /// Create a new object pool
98    pub fn new<F>(factory: F, max_size: usize) -> Self
99    where
100        F: Fn() -> T + 'static,
101    {
102        Self {
103            objects: RefCell::new(VecDeque::new()),
104            factory: Box::new(factory),
105            max_size,
106        }
107    }
108
109    /// Get an object from the pool (or create new one)
110    pub fn get(&self) -> PooledObject<'_, T> {
111        let obj = self
112            .objects
113            .borrow_mut()
114            .pop_front()
115            .unwrap_or_else(|| (self.factory)());
116
117        PooledObject {
118            object: Some(obj),
119            pool: self,
120        }
121    }
122
123    /// Return an object to the pool
124    fn return_object(&self, obj: T) {
125        let mut objects = self.objects.borrow_mut();
126        if objects.len() < self.max_size {
127            objects.push_back(obj);
128        }
129        // If pool is full, drop the object
130    }
131
132    /// Get current pool size
133    pub fn size(&self) -> usize {
134        self.objects.borrow().len()
135    }
136
137    /// Clear the pool
138    pub fn clear(&self) {
139        self.objects.borrow_mut().clear();
140    }
141}
142
143/// RAII wrapper for pooled objects
144pub struct PooledObject<'a, T> {
145    object: Option<T>,
146    pool: &'a ObjectPool<T>,
147}
148
149impl<'a, T> PooledObject<'a, T> {
150    /// Get mutable reference to the pooled object
151    pub fn get_mut(&mut self) -> &mut T {
152        self.object.as_mut().unwrap()
153    }
154
155    /// Get immutable reference to the pooled object
156    pub fn get(&self) -> &T {
157        self.object.as_ref().unwrap()
158    }
159}
160
161impl<'a, T> Drop for PooledObject<'a, T> {
162    fn drop(&mut self) {
163        if let Some(obj) = self.object.take() {
164            self.pool.return_object(obj);
165        }
166    }
167}
168
169/// Compact representation for DDEX elements to reduce memory usage
170#[derive(Debug, Clone)]
171#[allow(dead_code)]
172pub struct CompactElement {
173    /// Name index in string table
174    name_idx: u32,
175    /// Namespace index (optional)
176    namespace_idx: Option<u32>,
177    /// Attributes (packed)
178    attributes: CompactAttributes,
179    /// Children indices
180    children: Vec<CompactNodeRef>,
181}
182
183/// Node type in the AST
184#[derive(Debug, Clone)]
185pub enum NodeType {
186    /// XML element node with index in element table
187    Element(u32),
188    /// Text node with index in string table
189    Text(u32),
190    /// Comment node with index in string table
191    Comment(u32),
192}
193
194/// Node type in compact representation
195#[derive(Debug, Clone)]
196pub enum CompactNodeRef {
197    /// Element node with index in element table
198    Element(u32),
199    /// Text node with index in string table
200    Text(u32),
201    /// Comment node with index in string table
202    Comment(u32),
203}
204
205/// Compact attributes storage
206#[derive(Debug, Clone, Default)]
207pub struct CompactAttributes {
208    /// Packed attribute data: (key_idx, value_idx) pairs
209    data: Vec<(u32, u32)>,
210}
211
212impl CompactAttributes {
213    /// Add an attribute
214    pub fn insert(&mut self, key_idx: u32, value_idx: u32) {
215        self.data.push((key_idx, value_idx));
216    }
217
218    /// Get number of attributes
219    pub fn len(&self) -> usize {
220        self.data.len()
221    }
222
223    /// Check if empty
224    pub fn is_empty(&self) -> bool {
225        self.data.is_empty()
226    }
227
228    /// Iterate over attributes
229    pub fn iter(&self) -> impl Iterator<Item = (u32, u32)> + '_ {
230        self.data.iter().copied()
231    }
232}
233
234/// Compact AST representation for memory efficiency
235#[derive(Debug)]
236pub struct CompactAST {
237    /// String table for all text content
238    strings: Vec<String>,
239    /// String lookup map
240    string_map: IndexMap<String, u32>,
241    /// Element table
242    elements: Vec<CompactElement>,
243    /// Root element index
244    root_idx: u32,
245    /// Namespace table
246    namespaces: Vec<(u32, u32)>, // (prefix_idx, uri_idx) pairs
247    /// Schema location index
248    schema_location_idx: Option<u32>,
249}
250
251impl CompactAST {
252    /// Create a new compact AST
253    pub fn new() -> Self {
254        Self {
255            strings: Vec::new(),
256            string_map: IndexMap::new(),
257            elements: Vec::new(),
258            root_idx: 0,
259            namespaces: Vec::new(),
260            schema_location_idx: None,
261        }
262    }
263
264    /// Intern a string and return its index
265    pub fn intern_string(&mut self, s: &str) -> u32 {
266        if let Some(&idx) = self.string_map.get(s) {
267            return idx;
268        }
269
270        let idx = self.strings.len() as u32;
271        self.strings.push(s.to_string());
272        self.string_map.insert(s.to_string(), idx);
273        idx
274    }
275
276    /// Get string by index
277    pub fn get_string(&self, idx: u32) -> Option<&str> {
278        self.strings.get(idx as usize).map(|s| s.as_str())
279    }
280
281    /// Add an element and return its index
282    pub fn add_element(&mut self, element: CompactElement) -> u32 {
283        let idx = self.elements.len() as u32;
284        self.elements.push(element);
285        idx
286    }
287
288    /// Get element by index
289    pub fn get_element(&self, idx: u32) -> Option<&CompactElement> {
290        self.elements.get(idx as usize)
291    }
292
293    /// Calculate memory footprint
294    pub fn memory_footprint(&self) -> usize {
295        let strings_size = self.strings.iter().map(|s| s.len()).sum::<usize>();
296
297        let map_size = self.string_map.len() * (mem::size_of::<String>() + mem::size_of::<u32>());
298
299        let elements_size = self.elements.len() * mem::size_of::<CompactElement>();
300
301        strings_size + map_size + elements_size
302    }
303
304    /// Convert from regular AST (memory optimization pass)
305    pub fn from_ast(ast: &crate::ast::AST) -> Self {
306        let mut compact = CompactAST::new();
307
308        // Intern namespace strings
309        for (prefix, uri) in &ast.namespaces {
310            let prefix_idx = compact.intern_string(prefix);
311            let uri_idx = compact.intern_string(uri);
312            compact.namespaces.push((prefix_idx, uri_idx));
313        }
314
315        // Intern schema location if present
316        if let Some(ref location) = ast.schema_location {
317            compact.schema_location_idx = Some(compact.intern_string(location));
318        }
319
320        // Convert root element
321        compact.root_idx = compact.convert_element(&ast.root);
322
323        compact
324    }
325
326    /// Convert an element to compact format
327    fn convert_element(&mut self, element: &crate::ast::Element) -> u32 {
328        let name_idx = self.intern_string(&element.name);
329        let namespace_idx = element.namespace.as_ref().map(|ns| self.intern_string(ns));
330
331        // Convert attributes
332        let mut attributes = CompactAttributes::default();
333        for (key, value) in &element.attributes {
334            let key_idx = self.intern_string(key);
335            let value_idx = self.intern_string(value);
336            attributes.insert(key_idx, value_idx);
337        }
338
339        // Convert children (placeholder - would need full recursive conversion)
340        let children = Vec::new(); // Simplified for now
341
342        let compact_element = CompactElement {
343            name_idx,
344            namespace_idx,
345            attributes,
346            children,
347        };
348
349        self.add_element(compact_element)
350    }
351}
352
353/// Lazy-loaded data structures for optional fields
354pub struct LazyField<T> {
355    value: RefCell<Option<T>>,
356    loader: Box<dyn Fn() -> T>,
357}
358
359impl<T: std::fmt::Debug> std::fmt::Debug for LazyField<T> {
360    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
361        f.debug_struct("LazyField")
362            .field("value", &self.value)
363            .field("loader", &"<function>")
364            .finish()
365    }
366}
367
368impl<T> LazyField<T> {
369    /// Create a new lazy field
370    pub fn new<F>(loader: F) -> Self
371    where
372        F: Fn() -> T + 'static,
373    {
374        Self {
375            value: RefCell::new(None),
376            loader: Box::new(loader),
377        }
378    }
379
380    /// Get the value, loading if necessary
381    pub fn get(&self) -> std::cell::Ref<'_, T> {
382        if self.value.borrow().is_none() {
383            *self.value.borrow_mut() = Some((self.loader)());
384        }
385
386        std::cell::Ref::map(self.value.borrow(), |opt| opt.as_ref().unwrap())
387    }
388
389    /// Check if value is loaded
390    pub fn is_loaded(&self) -> bool {
391        self.value.borrow().is_some()
392    }
393
394    /// Clear the loaded value
395    pub fn clear(&self) {
396        *self.value.borrow_mut() = None;
397    }
398}
399
400/// Memory manager for the entire build process
401pub struct BuildMemoryManager {
402    /// Arena for temporary allocations
403    pub arena: Arena,
404    /// Element pool
405    pub element_pool: ObjectPool<crate::ast::Element>,
406    /// String pool for small strings
407    pub small_string_pool: ObjectPool<String>,
408    /// Large buffer pool for XML generation
409    pub buffer_pool: ObjectPool<Vec<u8>>,
410}
411
412impl BuildMemoryManager {
413    /// Create a new memory manager optimized for typical DDEX builds
414    pub fn new() -> Self {
415        Self {
416            arena: Arena::new(64 * 1024), // 64KB chunks
417            element_pool: ObjectPool::new(
418                || crate::ast::Element::new(""),
419                100, // Keep up to 100 elements
420            ),
421            small_string_pool: ObjectPool::new(
422                || String::with_capacity(64),
423                50, // Keep up to 50 small strings
424            ),
425            buffer_pool: ObjectPool::new(
426                || Vec::with_capacity(8192), // 8KB buffers
427                10,                          // Keep up to 10 buffers
428            ),
429        }
430    }
431
432    /// Get memory usage statistics
433    pub fn memory_usage(&self) -> MemoryStats {
434        MemoryStats {
435            arena_allocated: self.arena.allocated_bytes(),
436            arena_capacity: self.arena.capacity_bytes(),
437            element_pool_size: self.element_pool.size(),
438            string_pool_size: self.small_string_pool.size(),
439            buffer_pool_size: self.buffer_pool.size(),
440        }
441    }
442
443    /// Reset manager for next build (keeps pools)
444    pub fn reset_for_next_build(&self) {
445        self.arena.reset();
446        // Pools reset automatically when objects are returned
447    }
448
449    /// Full reset including all pools
450    pub fn full_reset(&self) {
451        self.arena.clear();
452        self.element_pool.clear();
453        self.small_string_pool.clear();
454        self.buffer_pool.clear();
455    }
456}
457
458impl Default for BuildMemoryManager {
459    fn default() -> Self {
460        Self::new()
461    }
462}
463
464/// Memory statistics
465#[derive(Debug, Default)]
466pub struct MemoryStats {
467    /// Bytes allocated in arena
468    pub arena_allocated: usize,
469    /// Total arena capacity
470    pub arena_capacity: usize,
471    /// Size of element pool
472    pub element_pool_size: usize,
473    /// Size of string pool
474    pub string_pool_size: usize,
475    /// Size of buffer pool
476    pub buffer_pool_size: usize,
477}
478
479impl MemoryStats {
480    /// Get total memory usage estimate
481    pub fn total_bytes(&self) -> usize {
482        self.arena_capacity +
483        (self.element_pool_size * mem::size_of::<crate::ast::Element>()) +
484        (self.string_pool_size * 64) + // Estimated string size
485        (self.buffer_pool_size * 8192) // Buffer size
486    }
487}
488
489#[cfg(test)]
490mod tests {
491    use super::*;
492
493    #[test]
494    fn test_arena_allocation() {
495        let arena = Arena::new(1024);
496
497        let val1 = arena.alloc(42u32);
498        let val2 = arena.alloc("hello world".to_string());
499
500        assert_eq!(*val1, 42);
501        assert_eq!(*val2, "hello world");
502
503        assert!(arena.allocated_bytes() > 0);
504    }
505
506    #[test]
507    fn test_object_pool() {
508        let pool = ObjectPool::new(|| String::with_capacity(32), 5);
509
510        {
511            let mut obj1 = pool.get();
512            obj1.get_mut().push_str("test");
513            assert_eq!(obj1.get(), "test");
514
515            {
516                let _obj2 = pool.get();
517                assert_eq!(pool.size(), 0); // Both objects checked out
518            }
519            // obj2 returned to pool
520        }
521        // obj1 returned to pool
522
523        assert_eq!(pool.size(), 2);
524    }
525
526    #[test]
527    fn test_compact_ast() {
528        let mut compact = CompactAST::new();
529
530        let hello_idx = compact.intern_string("hello");
531        let hello_idx2 = compact.intern_string("hello"); // Should reuse
532        let world_idx = compact.intern_string("world");
533
534        assert_eq!(hello_idx, hello_idx2);
535        assert_ne!(hello_idx, world_idx);
536        assert_eq!(compact.get_string(hello_idx), Some("hello"));
537        assert_eq!(compact.get_string(world_idx), Some("world"));
538    }
539
540    #[test]
541    fn test_lazy_field() {
542        let counter = RefCell::new(0);
543        let lazy = LazyField::new(move || {
544            *counter.borrow_mut() += 1;
545            "computed".to_string()
546        });
547
548        assert!(!lazy.is_loaded());
549
550        let val = lazy.get();
551        assert_eq!(*val, "computed");
552
553        // Second access shouldn't recompute
554        let val2 = lazy.get();
555        assert_eq!(*val2, "computed");
556    }
557
558    #[test]
559    fn test_memory_manager() {
560        let manager = BuildMemoryManager::new();
561        let stats = manager.memory_usage();
562
563        // Should start with some capacity but no allocation
564        assert_eq!(stats.arena_allocated, 0);
565        assert!(stats.arena_capacity > 0);
566    }
567}