expry 0.1.2

Execute an expression on an encoded (binary) value, yielding another binary value (either in decoded or encoded form). Supports custom functions. Supports parsing the expression and converting the expression to bytecode.
Documentation
use crate::raw_utils::EncodingError;

// See also https://manishearth.github.io/blog/2021/03/15/arenas-in-rust/
// See also https://doc.rust-lang.org/std/ptr/fn.write.html
// See also https://github.com/rust-lang/rust/issues/27779 (about absence of placement new)
// mem::align_of::<T>() ???
// https://doc.rust-lang.org/nomicon/vec/vec-alloc.html

pub fn from_hex(c: char) -> Result<u8,EncodingError> {
    if matches!(c, '0'..='9') {
        return Ok(c as u8 - b'0');
    } else if matches!(c, 'a'..='f') {
        return Ok(c as u8 - b'a' + 10);
    } else if matches!(c, 'A'..='F') {
        return Ok(c as u8 - b'A' + 10);
    }
    Err(EncodingError{line_nr: line!() })
}
pub fn from_hex_u8(c: u8) -> Result<u8,EncodingError> {
    if (b'0'..=b'9').contains(&c) {
        return Ok(c - b'0');
    } else if (b'a'..=b'f').contains(&c) {
        return Ok(c - b'a' + 10);
    } else if (b'A'..=b'F').contains(&c) {
        return Ok(c - b'A' + 10);
    }
    Err(EncodingError{line_nr: line!() })
}
/// Escaping for inside the `script` HTML tag.
pub fn html_escape_outside_attribute_u8(c: u8) -> Result<Option<&'static [u8]>,EncodingError> {
    Ok(match c {
        b'&' => Some(b"&amp;"),
        b'<' => Some(b"&lt;"),
        b'>' => Some(b"&gt;"),
        _ => None,
    })
}
pub fn html_escape_inside_attribute_u8(c: u8) -> Result<Option<&'static [u8]>,EncodingError> {
    Ok(match c {
        b'&' => Some(b"&amp;"),
        b'<' => Some(b"&lt;"),
        b'>' => Some(b"&gt;"),
        b'"' => Some(b"&quot;"),
        b'\'' => Some(b"&#39;"),
        _ => None,
    })
}
pub fn url_escape_u8(c: u8, buffer: &mut Vec<u8>) -> Result<Option<&[u8]>,EncodingError> {
    match c {
        b'-' |
        b'~' |
        b'.' |
        b'/' |
        b',' |
        b'=' |
        b'&' |
        b':' |
        b'?' |
        b'_' => return Ok(None),
        b' ' => return Ok(Some(b"+")),
        c if (b'a'..=b'z').contains(&c) ||
             (b'A'..=b'Z').contains(&c) ||
             (b'0'..=b'9').contains(&c) => return Ok(None),
        _ => {},
    };
    buffer.push(b'%');
    const CHARS : &[u8; 16] = b"0123456789ABCDEF";
    buffer.push(CHARS[(c >> 4) as usize]);
    buffer.push(CHARS[(c & 0xF) as usize]);
    Ok(Some(buffer))
}


pub type ReplaceFn = fn (u8, &mut Vec<u8>) -> Result<Option<&[u8]>,EncodingError>;

struct MemoryPage {
    available: usize,
    size: usize,
    ptr: *mut u8,
}

fn align_offset<T>(ptr: * mut T, align: usize) -> usize {
    let extra = (ptr as usize) & (align-1);
    if extra > 0 {
        align - extra
    } else {
        0
    }
}

impl MemoryPage {
    pub fn new(bytes: usize) -> Self {
        unsafe {
            let layout = std::alloc::Layout::from_size_align_unchecked(bytes, 1);
            let ptr = std::alloc::alloc(layout);
            Self {
                available: bytes,
                size: bytes,
                ptr,
            }
        }
    }

    fn alloc<T>(&mut self, count: usize) -> *mut T {
        let length = count*core::mem::size_of::<T>();
        debug_assert!(length <= self.available);
        unsafe {
            let ptr = self.ptr.add(self.size - self.available) as * mut T;
            let offset = align_offset(ptr, core::mem::align_of::<T>());
            let retval = (ptr as *mut u8).add(offset) as *mut T;
            self.available -= length + offset;
            retval
        }
    }
}

impl Drop for MemoryPage {
    fn drop(&mut self) {
        unsafe {
            let layout = std::alloc::Layout::from_size_align_unchecked(self.size, 1);
            std::alloc::dealloc(self.ptr, layout);
        }
    }
}

pub struct MemoryPool {
    content: Vec<MemoryPage>,
    next: Vec<MemoryPage>, // all the same DEFAULT_MEMORY_PAGE_SIZE
    delay_execution: Vec<fn()>, // FIXME: Vec should be of type Vec<std::ops::Drop> but without the heap allocation of a Box
}

impl MemoryPool {
    #[must_use]
    pub fn rewind(&mut self) -> MemoryScope<'_> {
        //println!("MemoryPool::scope");
        let len_content = self.content.len();
        let len_last_available = if let Some(last) = self.content.last() { last.available } else { 0 };
        let len_delay_execution = self.delay_execution.len();
        MemoryScope {
            pool: self,
            len_content,
            len_last_available,
            len_delay_execution,
            clear: false,
        }
    }
    #[must_use]
    pub fn clear(&mut self) -> MemoryScope<'_> {
        //println!("MemoryPool::scope");
        let len_content = self.content.len();
        let len_last_available = if let Some(last) = self.content.last() { last.available } else { 0 };
        let len_delay_execution = self.delay_execution.len();
        MemoryScope {
            pool: self,
            len_content,
            len_last_available,
            len_delay_execution,
            clear: true,
        }
    }
    pub fn new() -> Self {
        Self {
            content: Vec::with_capacity(32),
            next: Vec::with_capacity(32),
            delay_execution: Vec::new(),
        }
    }
}

impl Default for MemoryPool {
    fn default() -> Self {
        Self::new()
    }
}

impl Drop for MemoryPool {
    fn drop(&mut self) {
        //println!("drop memorypool: {}", self.content.len());
    }
}

pub struct MemoryScope<'c> {
    pool: &'c mut MemoryPool,
    len_content: usize,
    len_last_available: usize,
    len_delay_execution: usize,
    clear: bool,
}


impl<'c> MemoryScope<'c> {
    // make a new scope with a new lifetime
    #[must_use]
    pub fn rewind(&mut self) -> MemoryScope<'_> {
        //println!("scope::scope");
        let len_content = self.pool.content.len();
        let len_last_available = if let Some(last) = self.pool.content.last() { last.available } else { 0 };
        let len_delay_execution = self.pool.delay_execution.len();
        MemoryScope {
            pool: self.pool,
            len_content,
            len_last_available,
            len_delay_execution,
            clear: false,
        }
    }
    // make a new scope with a new lifetime
    #[must_use]
    pub fn clear(&mut self) -> MemoryScope<'_> {
        //println!("scope::scope");
        let len_content = self.pool.content.len();
        let len_last_available = if let Some(last) = self.pool.content.last() { last.available } else { 0 };
        let len_delay_execution = self.pool.delay_execution.len();
        MemoryScope {
            pool: self.pool,
            len_content,
            len_last_available,
            len_delay_execution,
            clear: true,
        }
    }

    fn available<T>(&self) -> usize {
        if let Some(last) = self.pool.content.last() {
            let ptr = unsafe { last.ptr.add(last.size - last.available) as * mut T };
            let offset = align_offset(ptr, core::mem::align_of::<T>());
            (last.available-offset)/core::mem::size_of::<T>()
        } else {
            0
        }
    }

    fn add(&mut self, length: usize) {
        const DEFAULT_MEMORY_PAGE_SIZE: usize = 16384;
        let length = ceil_to_power_of_two(length);
        if length <= DEFAULT_MEMORY_PAGE_SIZE {
            if let Some(page) = self.pool.next.pop() {
                self.pool.content.push(page);
            } else {
                let page = MemoryPage::new(DEFAULT_MEMORY_PAGE_SIZE);
                self.pool.content.push(page);
            }
        } else {
            let page = MemoryPage::new(length);
            self.pool.content.push(page);
        }
    }

    fn alloc_ptr<T>(&mut self, count: usize) -> * mut T {
        self._alloc_ptr(count, core::mem::size_of::<T>(), core::mem::align_of::<T>()) as * mut T
    }
    fn _alloc_ptr(&mut self, count: usize, size: usize, align: usize) -> * mut u8 {
        let length = std::cmp::max(1,count)*size;
        let at = self.pool.content.len();
        if at > 0 {
            let page = &mut self.pool.content[at-1];
            let ptr = unsafe { page.ptr.add(page.size - page.available) };
            let offset = align_offset(ptr, align);
            if page.available >= offset+length {
                page.available -= offset+length;
                return unsafe { ptr.add(offset) };

            }
        }
        self.add(length);
        debug_assert!(at < self.pool.content.len());
        let page = &mut self.pool.content[at];
        debug_assert!(page.available >= length);
        let ptr = page.ptr;
        let offset = align_offset(ptr, align);
        page.available -= offset+length;
        unsafe { ptr.add(offset) }
    }
    // return unused data with unused()
    fn alloc_ptr_remaining<T>(&mut self) -> (*mut T, usize) {
        if let Some(page) = self.pool.content.last_mut() {
            let ptr = unsafe { page.ptr.add(page.size - page.available) as * mut T };
            let offset = align_offset(ptr, core::mem::align_of::<T>());
            let length = core::mem::size_of::<T>();
            if page.available >= offset+length {
                page.available -= offset;
                let extra = page.available/core::mem::size_of::<T>();
                page.available -= extra*core::mem::size_of::<T>();
                return unsafe { ((ptr as * mut u8).add(offset) as * mut T, extra) };
            }

        }
        (core::ptr::null_mut::<T>(), 0)
    }
    // always allocs a new page
    // return unused data with unused()
    fn add_at_least<T>(&mut self, count: usize) -> (*mut T, usize) {
        let length = std::cmp::max(1,count)*core::mem::size_of::<T>();
        let at = self.pool.content.len();
        self.add(length);
        debug_assert!(at < self.pool.content.len());
        let page = &mut self.pool.content[at];
        debug_assert!(page.available >= length);
        let ptr = page.ptr;
        let offset = align_offset(ptr, core::mem::align_of::<T>());
        page.available -= offset;
        let extra = page.available/core::mem::size_of::<T>();
        page.available -= extra*core::mem::size_of::<T>();
        unsafe { (ptr.add(offset) as * mut T, extra) }
    }
    fn unused<T>(&mut self, count: usize) {
        if let Some(last) = self.pool.content.last_mut() {
            last.available += count * core::mem::size_of::<T>();
        }
    }

    fn current<T>(&mut self) -> *mut T {
        if let Some(page) = self.pool.content.last() {
            let ptr = unsafe { page.ptr.add(page.size - page.available) as * mut T };
            let offset = align_offset(ptr, core::mem::align_of::<T>());
            unsafe { (ptr as * mut u8).add(offset) as * mut T }
        } else {
            core::ptr::null_mut()
        }
    }

    pub fn alloc<'b, T>(&mut self, count: usize) -> &'b mut [T] where 'c: 'b {
        unsafe {
            let retval : *mut T = self.alloc_ptr::<T>(count);
            core::slice::from_raw_parts_mut(retval, count)
        }
    }
    pub fn copy_u8<'b>(&mut self, bytes: &'_ [u8]) -> &'b mut [u8] where 'c: 'b {
        let length = bytes.len();
        unsafe {
            let retval : *mut u8 = self.alloc_ptr(length);
            std::ptr::copy(bytes.as_ptr(), retval, length);
            core::slice::from_raw_parts_mut(retval, length)
        }
    }
    pub fn concat_u8<'b>(&mut self, chunks: &[&[u8]]) -> &'b mut [u8] where 'c: 'b {
        let length = chunks.iter().fold(0, |x,y| x + y.len());
        unsafe {
            let retval : *mut u8 = self.alloc_ptr(length);
            let mut current = retval;
            for c in chunks {
                std::ptr::copy(c.as_ptr(), current, c.len());
                current = current.add(c.len());
            }
            core::slice::from_raw_parts_mut(retval, length)
        }
    }
    pub fn concat_str<'b>(&mut self, chunks: &[&str]) -> &'b mut str where 'c: 'b {
        let length = chunks.iter().fold(0, |x,y| x + y.len());
        unsafe {
            let retval : *mut u8 = self.alloc_ptr(length);
            let mut current = retval;
            for c in chunks {
                std::ptr::copy(c.as_ptr(), current, c.len());
                current = current.add(c.len());
            }
            core::str::from_utf8_unchecked_mut(core::slice::from_raw_parts_mut(retval, length))
        }
    }
    pub fn copy_str<'b>(&mut self, str: &'_ str) -> &'b mut str where 'c: 'b {
        let retval = self.copy_u8(str.as_bytes());
        unsafe {
            core::str::from_utf8_unchecked_mut(retval)
        }
    }
    pub fn copy_hex_str<'b>(&mut self, str: &'_ str) -> &'b mut str where 'c: 'b {
        let item = self.alloc(str.len()*2);
        let mut i = 0;
        for b in str.as_bytes() {
            item[i] = b"0123456789abcdef"[(b >> 4) as usize];
            item[i+1] = b"0123456789abcdef"[(b & 0xF) as usize];
            i += 2;
        }
        unsafe {
            core::str::from_utf8_unchecked_mut(item)
        }
    }
    pub fn copy_hex<'b>(&mut self, s: &'_ [u8]) -> &'b mut [u8] where 'c: 'b {
        let item = self.alloc(s.len()*2);
        let mut i = 0;
        for b in s {
            item[i] = b"0123456789abcdef"[(b >> 4) as usize];
            item[i+1] = b"0123456789abcdef"[(b & 0xF) as usize];
            i += 2;
        }
        item
    }

    pub fn copy_unhex<'b>(&mut self, s: &'_ [u8]) -> Result<&'b mut [u8],EncodingError> where 'c: 'b {
        if s.len() % 2 != 0 {
            return Err(EncodingError{line_nr: line!() });
        }
        let item = self.alloc(s.len()/2);
        for i in 0..item.len() {
            let l = from_hex_u8(s[i*2])?;
            let r = from_hex_u8(s[i*2+1])?;
            item[i] = (l << 4) | r;
        }
        Ok(item)
    }

    // 1-on-1 or 1-on-0 replacements
    pub fn copy_with_replacement<'b>(&mut self, str: &'b [u8], replace: fn (u8) -> Result<Option<&'static [u8]>,EncodingError>) -> Result<&'b [u8],EncodingError> where 'c: 'b {
        for (i, c) in str.iter().enumerate() {
            if let Some(replacement) = replace(*c)? {
                let mut escaped_string = ScopedArrayBuilder::new(self);
                escaped_string.extend_from_slice(&str[..i]);
                escaped_string.extend_from_slice(replacement);
                for c in &str[i+1..] {
                    match replace(*c)? {
                        Some(escaped_char) => escaped_string.extend_from_slice(escaped_char),
                        None => escaped_string.push(*c),
                    };
                }
                return Ok(escaped_string.build());
            }
        }
        Ok(str)
    }
    pub fn copy_with_dynamic_replacement<'b>(&mut self, str: &'b [u8], replace: ReplaceFn) -> Result<&'b [u8],EncodingError> where 'c: 'b
    {
        let mut buffer : Vec<u8> = Vec::new();
        for (i, c) in str.iter().enumerate() {
            if let Some(replacement) = replace(*c, &mut buffer)? {
                let mut escaped_string = ScopedArrayBuilder::new(self);
                escaped_string.extend_from_slice(&str[..i]);
                escaped_string.extend_from_slice(replacement);
                for c in &str[i+1..] {
                    buffer.clear();
                    match replace(*c, &mut buffer)? {
                        Some(escaped_char) => escaped_string.extend_from_slice(escaped_char),
                        None => escaped_string.push(*c),
                    };
                }
                return Ok(escaped_string.build());
            }
        }
        Ok(str)
    }
    pub fn write_fmt<'b>(&mut self, args: std::fmt::Arguments<'_>) -> &'b str where 'c: 'b {
        if let Some(s) = args.as_str() {
            return s;
        }

        let mut output = ScopedStringBuilder::new(self);
        // unwrap() similar to format!: see https://doc.rust-lang.org/src/alloc/fmt.rs.html#597
        core::fmt::write(&mut output, args).unwrap();
        output.build()
    }

    pub fn array_from_iter<'b,I: Iterator<Item = T>,T: Copy>(&mut self, iter: I) -> &'b [T] where 'c: 'b {
        let mut builder = ScopedArrayBuilder::new(self);
        for v in iter {
            builder.push(v);
        }
        builder.build()
    }
}

fn ceil_to_power_of_two(mut length: usize) -> usize {
    // round to next power of two
    let next_power_of_two = 1 << (core::mem::size_of::<usize>()*8 - (length.leading_zeros() as usize));
    debug_assert!(next_power_of_two >= length);
    if length & ((next_power_of_two>>1)-1) != 0 {
        length = next_power_of_two;
    }
    length
}

pub struct ScopedArrayBuilder<'a, 'c, T: Copy> {
    scope: &'a mut MemoryScope<'c>,
    ptr: *mut T,
    len: usize,
    capacity: usize,
}

impl<'a, 'c, T: Copy> ScopedArrayBuilder<'a, 'c, T> {
    pub fn new(scope: &'a mut MemoryScope<'c>) -> Self {
        let (ptr, capacity) = scope.alloc_ptr_remaining::<T>();
        Self { scope, ptr, len: 0, capacity }
    }

    pub fn clear(&mut self) {
        self.len = 0;
    }

    // returns temporary str, useful to evaluate of this candidate is any good (if not, use clear())
    pub fn as_slice(&mut self) -> &[T] {
        unsafe {
            core::slice::from_raw_parts_mut(self.ptr, self.len)
        }
    }

    // returns a more permanent str
    // resulting lifetime is min('c, T)
    #[must_use]
    pub fn build<'b>(self) -> &'b mut [T] where T: 'b, 'c: 'b {
        self.scope.unused::<T>(self.capacity - self.len);
        unsafe {
            core::slice::from_raw_parts_mut(self.ptr, self.len)
        }
    }

    fn alloc(&mut self, count: usize) {
        let (ptr,capacity) = self.scope.add_at_least::<T>(count + self.len);
        unsafe {
            if self.len > 0 {
                std::ptr::copy(self.ptr, ptr, self.len);
            }
            self.ptr = ptr;
            self.capacity = capacity;
        }
    }

    pub fn push(&mut self, v: T) {
        if self.len == self.capacity {
            self.alloc(self.len+1);
        }
        unsafe {
            *self.ptr.add(self.len) = v;
        }
        self.len += 1;
    }

    pub fn extend_from_slice(&mut self, v: &[T]) {
        if self.len+v.len() > self.capacity {
            self.alloc(self.len+v.len());
        }
        unsafe {
            std::ptr::copy(v.as_ptr(), self.ptr.add(self.len), v.len());
        }
        self.len += v.len();
    }

    #[must_use]
    pub fn len(&self) -> usize {
        self.len
    }

    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.len == 0
    }
}

pub struct ScopedStringBuilder<'a, 'c> {
    scope: &'a mut MemoryScope<'c>,
    ptr: *mut u8,
    len: usize,
    capacity: usize,
}

impl<'a, 'c> ScopedStringBuilder<'a, 'c> {
    pub fn new(scope: &'a mut MemoryScope<'c>) -> Self {
        let (ptr, capacity) = scope.alloc_ptr_remaining::<u8>();
        Self { scope, ptr, len: 0, capacity }
    }

    pub fn clear(&mut self) {
        self.len = 0;
    }

    // returns temporary str, useful to evaluate of this candidate is any good (if not, use clear())
    pub fn as_str(&mut self) -> &str {
        unsafe {
            core::str::from_utf8_unchecked_mut(core::slice::from_raw_parts_mut(self.ptr, self.len))
        }
    }

    // returns a more permanent str
    #[must_use]
    pub fn build<'b>(self) -> &'b mut str where 'c: 'b {
        self.scope.unused::<u8>(self.capacity - self.len);
        // SAFETY: all things appended to this buffer are valid utf8, so no need to check
        unsafe {
            core::str::from_utf8_unchecked_mut(core::slice::from_raw_parts_mut(self.ptr, self.len))
        }
    }

    fn alloc(&mut self, count: usize) {
        let (ptr,capacity) = self.scope.add_at_least::<u8>(count + self.len);
        unsafe {
            if self.len > 0 {
                std::ptr::copy(self.ptr, ptr, self.len);
            }
            self.ptr = ptr;
            self.capacity = capacity;
        }
    }

    pub fn push_char(&mut self, c: char) {
        let len = c.len_utf8();
        if self.len+len > self.capacity {
            self.alloc(self.len+len);
        }
        let val = unsafe { core::slice::from_raw_parts_mut(self.ptr.add(self.len), len) };
        unsafe {
            debug_assert_eq!(val.as_ptr(), self.ptr.add(self.len));
        }
        c.encode_utf8(val);
        self.len += len;
    }

    #[must_use]
    pub fn len(&self) -> usize {
        self.len
    }

    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.len == 0
    }
}

impl<'a, 'c> core::fmt::Write for ScopedStringBuilder<'a, 'c> {
    fn write_str(&mut self, s: &str) -> core::fmt::Result {
        let s = s.as_bytes();
        if self.len+s.len() > self.capacity {
            self.alloc(self.len+s.len());
        }
        unsafe {
            std::ptr::copy(s.as_ptr(), self.ptr.add(self.len), s.len());
        }
        self.len += s.len();
        Ok(())
    }
}

impl<'c> Drop for MemoryScope<'c> {
    fn drop(&mut self) {
        let content: &mut _ = &mut (*self.pool).content;

        // execute delayed functions
        let delay_execution: &mut _ = &mut (*self.pool).delay_execution;
        //println!("drop scope: {} and {}/{}", self.len_content, self.len_delay_execution, delay_execution.len());
        debug_assert!(self.len_delay_execution <= delay_execution.len());
        if self.len_delay_execution < delay_execution.len() {
            for i in (self.len_delay_execution..delay_execution.len()).rev() {
                //println!("execute delay {}", i);
                let f = delay_execution.remove(i);
                f();
            }
        }

        //println!("truncating memorypool from {} to {}, freeing {} bytes", content.len(), self.len_content, content.iter().map(|x| x.len()).sum::<usize>());
        if self.clear {
            content.truncate(self.len_content);
        } else {
            for mut page in content.drain(self.len_content..) {
                page.available = page.size;
                self.pool.next.push(page);
            }
        }
        if let Some(last) = content.last_mut() {
            last.available = self.len_last_available;
        }
    }
}

#[cfg(test)]
mod tests {
    #[test]
    fn format() {
        let mut allocator = crate::MemoryPool::new();
        let mut scope = allocator.rewind();
        let s : &str = write!(scope, "foo {}", 42);
        assert_eq!("foo 42", s);
    }

    #[test]
    fn power_of_two() {
        let mut length = 2048usize;
        let next_power_of_two = 1 << (core::mem::size_of::<usize>()*8 - (length.leading_zeros() as usize));
        eprintln!("{} -> {} -> {}", length, next_power_of_two, length & ((next_power_of_two>>1)-1));
        if length & ((next_power_of_two>>1)-1) != 0 {
            length = next_power_of_two;
        }
        assert!(length == 2048);
    }
}