use indexmap::IndexMap;
use std::cell::RefCell;
use std::collections::VecDeque;
use std::mem;
pub struct Arena {
chunks: RefCell<Vec<Vec<u8>>>,
current_chunk: RefCell<usize>,
current_offset: RefCell<usize>,
chunk_size: usize,
}
impl Arena {
pub fn new(chunk_size: usize) -> Self {
Self {
chunks: RefCell::new(vec![Vec::with_capacity(chunk_size)]),
current_chunk: RefCell::new(0),
current_offset: RefCell::new(0),
chunk_size,
}
}
pub fn alloc<T>(&self, value: T) -> Box<T> {
let size = std::mem::size_of::<T>();
{
let mut chunks = self.chunks.borrow_mut();
if chunks.is_empty() || chunks.last().unwrap().len() + size > self.chunk_size {
chunks.push(Vec::with_capacity(self.chunk_size));
*self.current_chunk.borrow_mut() = chunks.len() - 1;
*self.current_offset.borrow_mut() = 0;
}
let current_chunk_idx = *self.current_chunk.borrow();
if let Some(chunk) = chunks.get_mut(current_chunk_idx) {
chunk.resize(chunk.len() + size, 0);
*self.current_offset.borrow_mut() += size;
}
}
Box::new(value)
}
pub fn allocated_bytes(&self) -> usize {
self.chunks.borrow().iter().map(|chunk| chunk.len()).sum()
}
pub fn capacity_bytes(&self) -> usize {
self.chunks
.borrow()
.iter()
.map(|chunk| chunk.capacity())
.sum()
}
pub fn reset(&self) {
let mut chunks = self.chunks.borrow_mut();
for chunk in chunks.iter_mut() {
chunk.clear();
}
*self.current_chunk.borrow_mut() = 0;
*self.current_offset.borrow_mut() = 0;
}
pub fn clear(&self) {
self.chunks.borrow_mut().clear();
*self.current_chunk.borrow_mut() = 0;
*self.current_offset.borrow_mut() = 0;
}
}
pub struct ObjectPool<T> {
objects: RefCell<VecDeque<T>>,
factory: Box<dyn Fn() -> T>,
max_size: usize,
}
impl<T> ObjectPool<T> {
pub fn new<F>(factory: F, max_size: usize) -> Self
where
F: Fn() -> T + 'static,
{
Self {
objects: RefCell::new(VecDeque::new()),
factory: Box::new(factory),
max_size,
}
}
pub fn get(&self) -> PooledObject<'_, T> {
let obj = self
.objects
.borrow_mut()
.pop_front()
.unwrap_or_else(|| (self.factory)());
PooledObject {
object: Some(obj),
pool: self,
}
}
fn return_object(&self, obj: T) {
let mut objects = self.objects.borrow_mut();
if objects.len() < self.max_size {
objects.push_back(obj);
}
}
pub fn size(&self) -> usize {
self.objects.borrow().len()
}
pub fn clear(&self) {
self.objects.borrow_mut().clear();
}
}
pub struct PooledObject<'a, T> {
object: Option<T>,
pool: &'a ObjectPool<T>,
}
impl<'a, T> PooledObject<'a, T> {
pub fn get_mut(&mut self) -> &mut T {
self.object.as_mut().unwrap()
}
pub fn get(&self) -> &T {
self.object.as_ref().unwrap()
}
}
impl<'a, T> Drop for PooledObject<'a, T> {
fn drop(&mut self) {
if let Some(obj) = self.object.take() {
self.pool.return_object(obj);
}
}
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct CompactElement {
name_idx: u32,
namespace_idx: Option<u32>,
attributes: CompactAttributes,
children: Vec<CompactNodeRef>,
}
#[derive(Debug, Clone)]
pub enum NodeType {
Element(u32),
Text(u32),
Comment(u32),
}
#[derive(Debug, Clone)]
pub enum CompactNodeRef {
Element(u32),
Text(u32),
Comment(u32),
}
#[derive(Debug, Clone, Default)]
pub struct CompactAttributes {
data: Vec<(u32, u32)>,
}
impl CompactAttributes {
pub fn insert(&mut self, key_idx: u32, value_idx: u32) {
self.data.push((key_idx, value_idx));
}
pub fn len(&self) -> usize {
self.data.len()
}
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
pub fn iter(&self) -> impl Iterator<Item = (u32, u32)> + '_ {
self.data.iter().copied()
}
}
#[derive(Debug)]
pub struct CompactAST {
strings: Vec<String>,
string_map: IndexMap<String, u32>,
elements: Vec<CompactElement>,
root_idx: u32,
namespaces: Vec<(u32, u32)>, schema_location_idx: Option<u32>,
}
impl CompactAST {
pub fn new() -> Self {
Self {
strings: Vec::new(),
string_map: IndexMap::new(),
elements: Vec::new(),
root_idx: 0,
namespaces: Vec::new(),
schema_location_idx: None,
}
}
pub fn intern_string(&mut self, s: &str) -> u32 {
if let Some(&idx) = self.string_map.get(s) {
return idx;
}
let idx = self.strings.len() as u32;
self.strings.push(s.to_string());
self.string_map.insert(s.to_string(), idx);
idx
}
pub fn get_string(&self, idx: u32) -> Option<&str> {
self.strings.get(idx as usize).map(|s| s.as_str())
}
pub fn add_element(&mut self, element: CompactElement) -> u32 {
let idx = self.elements.len() as u32;
self.elements.push(element);
idx
}
pub fn get_element(&self, idx: u32) -> Option<&CompactElement> {
self.elements.get(idx as usize)
}
pub fn memory_footprint(&self) -> usize {
let strings_size = self.strings.iter().map(|s| s.len()).sum::<usize>();
let map_size = self.string_map.len() * (mem::size_of::<String>() + mem::size_of::<u32>());
let elements_size = self.elements.len() * mem::size_of::<CompactElement>();
strings_size + map_size + elements_size
}
pub fn from_ast(ast: &crate::ast::AST) -> Self {
let mut compact = CompactAST::new();
for (prefix, uri) in &ast.namespaces {
let prefix_idx = compact.intern_string(prefix);
let uri_idx = compact.intern_string(uri);
compact.namespaces.push((prefix_idx, uri_idx));
}
if let Some(ref location) = ast.schema_location {
compact.schema_location_idx = Some(compact.intern_string(location));
}
compact.root_idx = compact.convert_element(&ast.root);
compact
}
fn convert_element(&mut self, element: &crate::ast::Element) -> u32 {
let name_idx = self.intern_string(&element.name);
let namespace_idx = element.namespace.as_ref().map(|ns| self.intern_string(ns));
let mut attributes = CompactAttributes::default();
for (key, value) in &element.attributes {
let key_idx = self.intern_string(key);
let value_idx = self.intern_string(value);
attributes.insert(key_idx, value_idx);
}
let children = Vec::new();
let compact_element = CompactElement {
name_idx,
namespace_idx,
attributes,
children,
};
self.add_element(compact_element)
}
}
pub struct LazyField<T> {
value: RefCell<Option<T>>,
loader: Box<dyn Fn() -> T>,
}
impl<T: std::fmt::Debug> std::fmt::Debug for LazyField<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LazyField")
.field("value", &self.value)
.field("loader", &"<function>")
.finish()
}
}
impl<T> LazyField<T> {
pub fn new<F>(loader: F) -> Self
where
F: Fn() -> T + 'static,
{
Self {
value: RefCell::new(None),
loader: Box::new(loader),
}
}
pub fn get(&self) -> std::cell::Ref<'_, T> {
if self.value.borrow().is_none() {
*self.value.borrow_mut() = Some((self.loader)());
}
std::cell::Ref::map(self.value.borrow(), |opt| opt.as_ref().unwrap())
}
pub fn is_loaded(&self) -> bool {
self.value.borrow().is_some()
}
pub fn clear(&self) {
*self.value.borrow_mut() = None;
}
}
pub struct BuildMemoryManager {
pub arena: Arena,
pub element_pool: ObjectPool<crate::ast::Element>,
pub small_string_pool: ObjectPool<String>,
pub buffer_pool: ObjectPool<Vec<u8>>,
}
impl BuildMemoryManager {
pub fn new() -> Self {
Self {
arena: Arena::new(64 * 1024), element_pool: ObjectPool::new(
|| crate::ast::Element::new(""),
100, ),
small_string_pool: ObjectPool::new(
|| String::with_capacity(64),
50, ),
buffer_pool: ObjectPool::new(
|| Vec::with_capacity(8192), 10, ),
}
}
pub fn memory_usage(&self) -> MemoryStats {
MemoryStats {
arena_allocated: self.arena.allocated_bytes(),
arena_capacity: self.arena.capacity_bytes(),
element_pool_size: self.element_pool.size(),
string_pool_size: self.small_string_pool.size(),
buffer_pool_size: self.buffer_pool.size(),
}
}
pub fn reset_for_next_build(&self) {
self.arena.reset();
}
pub fn full_reset(&self) {
self.arena.clear();
self.element_pool.clear();
self.small_string_pool.clear();
self.buffer_pool.clear();
}
}
impl Default for BuildMemoryManager {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Default)]
pub struct MemoryStats {
pub arena_allocated: usize,
pub arena_capacity: usize,
pub element_pool_size: usize,
pub string_pool_size: usize,
pub buffer_pool_size: usize,
}
impl MemoryStats {
pub fn total_bytes(&self) -> usize {
self.arena_capacity +
(self.element_pool_size * mem::size_of::<crate::ast::Element>()) +
(self.string_pool_size * 64) + (self.buffer_pool_size * 8192) }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_arena_allocation() {
let arena = Arena::new(1024);
let val1 = arena.alloc(42u32);
let val2 = arena.alloc("hello world".to_string());
assert_eq!(*val1, 42);
assert_eq!(*val2, "hello world");
assert!(arena.allocated_bytes() > 0);
}
#[test]
fn test_object_pool() {
let pool = ObjectPool::new(|| String::with_capacity(32), 5);
{
let mut obj1 = pool.get();
obj1.get_mut().push_str("test");
assert_eq!(obj1.get(), "test");
{
let _obj2 = pool.get();
assert_eq!(pool.size(), 0); }
}
assert_eq!(pool.size(), 2);
}
#[test]
fn test_compact_ast() {
let mut compact = CompactAST::new();
let hello_idx = compact.intern_string("hello");
let hello_idx2 = compact.intern_string("hello"); let world_idx = compact.intern_string("world");
assert_eq!(hello_idx, hello_idx2);
assert_ne!(hello_idx, world_idx);
assert_eq!(compact.get_string(hello_idx), Some("hello"));
assert_eq!(compact.get_string(world_idx), Some("world"));
}
#[test]
fn test_lazy_field() {
let counter = RefCell::new(0);
let lazy = LazyField::new(move || {
*counter.borrow_mut() += 1;
"computed".to_string()
});
assert!(!lazy.is_loaded());
let val = lazy.get();
assert_eq!(*val, "computed");
let val2 = lazy.get();
assert_eq!(*val2, "computed");
}
#[test]
fn test_memory_manager() {
let manager = BuildMemoryManager::new();
let stats = manager.memory_usage();
assert_eq!(stats.arena_allocated, 0);
assert!(stats.arena_capacity > 0);
}
}