oxidef_compact1 0.1.0-alpha.1

Oxidef is an experimental interface definition language and serialisation scheme for efficient and strongly-typed payloads.
Documentation
use core::ops::{Deref, DerefMut, Range};

pub use bytes::BufMut;
use thiserror::Error;

#[derive(Clone, Debug, Error, PartialEq, Eq, Hash)]
pub enum EncError {
    #[error("value did not fit into varuint")]
    VarUintOverflow,

    #[error("container too long")]
    ContainerTooLong,

    // alternatively we may consider supporting the ability to carry the raw byte encoding
    // but this is prone to evil manipulation, so should really be opt-in
    #[error("attempt to encode 'future' enum/union variant")]
    EncodeFutureVariant,

    #[cfg(all(feature = "alloc", feature = "validation"))]
    #[error("validation error at {path}: [{rule_code}] {rule_message}")]
    Validation {
        path: String,
        rule_code: &'static str,
        rule_message: &'static str,
    },

    #[error("float not finite (either NaN or an infinity); not allowed except for `raw_` floats")]
    FloatNotFinite,
}

/// Encoder for `compact1` data.
///
/// # Safety
///
/// You MUST call `finish` once finished, to ensure all writes are complete.
pub struct Encoder<B: ImprovidentBufMut> {
    bit_stash_remaining: u8,
    bit_stash_value: u8,
    bit_stash_mark: Option<B::Mark>,
    pub buf: B,
}
impl<B: ImprovidentBufMut> Encoder<B> {
    pub fn new(buf: B) -> Self {
        Self {
            bit_stash_remaining: 0,
            bit_stash_value: 0,
            bit_stash_mark: None,
            buf,
        }
    }

    pub fn finish(mut self) -> B::Fin {
        self.flush_bit_stash();
        self.buf.finish()
    }

    pub fn flush_bit_stash(&mut self) {
        let Some(mark) = self.bit_stash_mark.take() else {
            return;
        };
        self.buf.rewrite_mark(mark, &[self.bit_stash_value]);
        self.bit_stash_value = 0;
        self.bit_stash_remaining = 0;
    }

    pub fn write_stash_bit(&mut self, bit: bool) {
        if self.bit_stash_remaining == 0 {
            let mark_start = self.buf.mark_start();
            self.buf.put_u8(0);
            let mark = self.buf.mark_end(mark_start);
            self.bit_stash_mark = Some(mark);
            self.bit_stash_value = 0;
            self.bit_stash_remaining = 8;
        }

        self.bit_stash_remaining -= 1;
        if bit {
            self.bit_stash_value |= 1 << self.bit_stash_remaining;
        }

        if self.bit_stash_remaining == 0 {
            self.flush_bit_stash();
        }
    }
}

/// A `BufMut` that additionally allows getting a marked position and rewriting a range of bytes there later.
pub trait ImprovidentBufMut: Deref<Target = Self::InnerBuf> + DerefMut {
    type InnerBuf: BufMut;
    type MarkStart: 'static;
    type Mark: 'static;
    type Fin;

    /// Begin a mark.
    ///
    /// Note: if the mark is not rewritten, it may leak memory.
    fn mark_start(&mut self) -> Self::MarkStart;

    /// End a mark.
    /// Callers may only end the most recently started mark.
    ///
    /// Note: if the mark is not rewritten, it may leak memory.
    fn mark_end(&mut self, start: Self::MarkStart) -> Self::Mark;

    /// Consume a mark and measure the number of bytes written since.
    fn mark_to_measure(&mut self, mark: Self::MarkStart) -> usize;

    /// Rewrite the bytes that are contained within a mark.
    ///
    /// Caution for implementor: must support rewrites out-of-order.
    /// Depending on implementation details, this may involve remapping indices.
    fn rewrite_mark(&mut self, mark: Self::Mark, bytes: &[u8]);

    fn finish(self) -> Self::Fin;
}

#[cfg(feature = "alloc")]
#[derive(Default)]
pub struct ImprovidentVec {
    buf: ::alloc::vec::Vec<u8>,
    /// Marks
    /// Invariants:
    /// - always sorted by mark_number
    /// - no two mark numbers are the same
    marks: ::alloc::vec::Vec<(u32, Range<usize>)>,
}

#[cfg(feature = "alloc")]
impl Deref for ImprovidentVec {
    type Target = ::alloc::vec::Vec<u8>;

    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        &self.buf
    }
}
#[cfg(feature = "alloc")]
impl DerefMut for ImprovidentVec {
    #[inline(always)]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.buf
    }
}

#[cfg(feature = "alloc")]
impl ImprovidentBufMut for ImprovidentVec {
    type InnerBuf = ::alloc::vec::Vec<u8>;
    type MarkStart = u32;

    type Mark = u32;
    type Fin = ::alloc::vec::Vec<u8>;

    fn mark_start(&mut self) -> Self::MarkStart {
        let mark_number = self
            .marks
            .last()
            .map(|(mark_number, _)| mark_number.checked_add(1).unwrap())
            .unwrap_or(0);
        self.marks
            .push((mark_number, self.buf.len()..self.buf.len()));
        mark_number
    }

    fn mark_end(&mut self, mark_number: Self::MarkStart) -> Self::Mark {
        let (last_mark_number, last_mark_range) = self
            .marks
            .last_mut()
            .expect("mark_end called but there are no marks");
        if *last_mark_number != mark_number {
            panic!("mark_end called on non-last-mark");
        }
        last_mark_range.end = self.buf.len();
        mark_number
    }

    fn mark_to_measure(&mut self, mark_number: Self::MarkStart) -> usize {
        let mark_idx = self
            .marks
            .binary_search_by_key(&mark_number, |(some_mark_number, _)| *some_mark_number)
            .expect("rewrite_mark called on non-existent mark");

        let (_, mark_range) = self.marks.remove(mark_idx);

        self.len() - mark_range.start
    }

    fn rewrite_mark(&mut self, mark_number: Self::Mark, bytes: &[u8]) {
        let mark_idx = self
            .marks
            .binary_search_by_key(&mark_number, |(some_mark_number, _)| *some_mark_number)
            .expect("rewrite_mark called on non-existent mark");

        let (_, mark_range) = self.marks.remove(mark_idx);

        let old_size = mark_range.end - mark_range.start;
        let new_size = bytes.len();

        if new_size != old_size && mark_idx < self.marks.len() {
            for (_, other_mark_range) in &mut self.marks[mark_idx..] {
                other_mark_range.start = other_mark_range.start + new_size - old_size;
                other_mark_range.end = other_mark_range.end + new_size - old_size;
            }
        }

        self.splice(mark_range, bytes.iter().copied());
    }

    fn finish(self) -> Self::Fin {
        self.buf
    }
}

#[cfg(any(test, feature = "alloc"))]
pub fn encode_unvalidated<T: crate::codec::Compact1Codec>(value: &T) -> Result<Vec<u8>, EncError> {
    let mut encoder = Encoder::new(ImprovidentVec::default());
    value.encode(&mut encoder)?;
    let bytes = encoder.finish();
    Ok(bytes)
}

#[cfg(all(feature = "alloc", feature = "validation"))]
pub fn encode<T: crate::codec::Compact1Codec + oxidef_validation::Validate>(
    value: &T,
) -> Result<Vec<u8>, EncError> {
    value.validate().map_err(|err| EncError::Validation {
        path: format!("{:?}", err.path),
        rule_code: err.rule_code,
        rule_message: err.rule_message,
    })?;

    encode_unvalidated(value)
}

#[cfg(test)]
mod test {
    use bytes::BufMut;

    use super::{ImprovidentBufMut, ImprovidentVec};

    #[test]
    fn test_mark_rewriting() {
        let mut iv = ImprovidentVec::default();

        iv.put_slice(b"hello I am ");

        let mark_start = iv.mark_start();
        iv.put_slice(b"a placeholder");
        let mark1_end = iv.mark_end(mark_start);

        iv.put_slice(b", how ");
        let mark_start = iv.mark_start();
        iv.put_slice(b"is the weather?");
        let mark2_end = iv.mark_end(mark_start);

        iv.rewrite_mark(mark1_end, b"John");
        iv.rewrite_mark(
            mark2_end,
            b"are you doing on this fine Saturday, my friend?",
        );

        assert_eq!(
            iv.finish(),
            b"hello I am John, how are you doing on this fine Saturday, my friend?"
        );
    }
}