#![cfg(feature = "backends")]
use super::Backend;
use crate::{symbol::expect_valid_symbol, DefaultSymbol, Symbol};
use alloc::vec::Vec;
use core::{marker::PhantomData, mem, str};
#[derive(Debug)]
pub struct BufferBackend<S = DefaultSymbol> {
len_strings: usize,
buffer: Vec<u8>,
marker: PhantomData<fn() -> S>,
}
impl<S> PartialEq for BufferBackend<S>
where
S: Symbol,
{
fn eq(&self, other: &Self) -> bool {
self.len_strings.eq(&other.len_strings) && self.buffer.eq(&other.buffer)
}
}
impl<S> Eq for BufferBackend<S> where S: Symbol {}
impl<S> Clone for BufferBackend<S> {
fn clone(&self) -> Self {
Self {
len_strings: self.len_strings,
buffer: self.buffer.clone(),
marker: Default::default(),
}
}
}
impl<S> Default for BufferBackend<S> {
#[cfg_attr(feature = "inline-more", inline)]
fn default() -> Self {
Self {
len_strings: 0,
buffer: Default::default(),
marker: Default::default(),
}
}
}
impl<S> BufferBackend<S>
where
S: Symbol,
{
#[inline]
fn next_symbol(&self) -> S {
expect_valid_symbol(self.buffer.len())
}
fn resolve_index_to_str(&self, index: usize) -> Option<(&str, usize)> {
let bytes = self.buffer.get(index..)?;
let (str_len, str_len_bytes) = decode_var_usize(bytes)?;
let index_str = index + str_len_bytes;
let str_bytes = self.buffer.get(index_str..index_str + str_len)?;
let string = unsafe { str::from_utf8_unchecked(str_bytes) };
Some((string, index_str + str_len))
}
unsafe fn resolve_index_to_str_unchecked(&self, index: usize) -> &str {
let slice_len = unsafe { self.buffer.get_unchecked(index..) };
let (str_len, str_len_bytes) = unsafe { decode_var_usize_unchecked(slice_len) };
let start_str = index + str_len_bytes;
let str_bytes =
unsafe { self.buffer.get_unchecked(start_str..start_str + str_len) };
unsafe { str::from_utf8_unchecked(str_bytes) }
}
#[inline]
fn encode_var_usize(&mut self, value: usize) -> usize {
encode_var_usize(&mut self.buffer, value)
}
fn push_string(&mut self, string: &str) -> S {
let symbol = self.next_symbol();
let str_len = string.len();
let str_bytes = string.as_bytes();
self.encode_var_usize(str_len);
self.buffer.extend(str_bytes);
self.len_strings += 1;
symbol
}
}
impl<S> Backend for BufferBackend<S>
where
S: Symbol,
{
type Symbol = S;
#[cfg_attr(feature = "inline-more", inline)]
fn with_capacity(capacity: usize) -> Self {
const LEN_USIZE: usize = mem::size_of::<usize>();
const DEFAULT_STR_LEN: usize = 5;
let bytes_per_string = DEFAULT_STR_LEN + LEN_USIZE;
Self {
len_strings: 0,
buffer: Vec::with_capacity(capacity * bytes_per_string),
marker: Default::default(),
}
}
#[inline]
fn intern(&mut self, string: &str) -> Self::Symbol {
self.push_string(string)
}
#[inline]
fn resolve(&self, symbol: Self::Symbol) -> Option<&str> {
self.resolve_index_to_str(symbol.to_usize())
.map(|(string, _next_str_index)| string)
}
fn shrink_to_fit(&mut self) {
self.buffer.shrink_to_fit();
}
#[inline]
unsafe fn resolve_unchecked(&self, symbol: Self::Symbol) -> &str {
unsafe { self.resolve_index_to_str_unchecked(symbol.to_usize()) }
}
}
#[inline]
fn encode_var_usize(buffer: &mut Vec<u8>, mut value: usize) -> usize {
if value <= 0x7F {
buffer.push(value as u8);
return 1;
}
let mut len_chunks = 0;
loop {
let mut chunk = (value as u8) & 0x7F_u8;
value >>= 7;
chunk |= ((value != 0) as u8) << 7;
buffer.push(chunk);
len_chunks += 1;
if value == 0 {
break;
}
}
len_chunks
}
#[inline]
unsafe fn decode_var_usize_unchecked(buffer: &[u8]) -> (usize, usize) {
let first = unsafe { *buffer.get_unchecked(0) };
if first <= 0x7F_u8 {
return (first as usize, 1);
}
let mut result: usize = 0;
let mut i = 0;
loop {
let byte = unsafe { *buffer.get_unchecked(i) };
let shifted = ((byte & 0x7F_u8) as usize) << ((i * 7) as u32);
result += shifted;
if (byte & 0x80) == 0 {
break;
}
i += 1;
}
(result, i + 1)
}
fn decode_var_usize(buffer: &[u8]) -> Option<(usize, usize)> {
if !buffer.is_empty() && buffer[0] <= 0x7F_u8 {
return Some((buffer[0] as usize, 1));
}
let mut result: usize = 0;
let mut i = 0;
loop {
let byte = *buffer.get(i)?;
let shifted = ((byte & 0x7F_u8) as usize).checked_shl((i * 7) as u32)?;
result = result.checked_add(shifted)?;
if (byte & 0x80) == 0 {
break;
}
i += 1;
}
Some((result, i + 1))
}
#[cfg(test)]
mod tests {
use super::{decode_var_usize, encode_var_usize};
#[cfg(not(feature = "std"))]
use alloc::vec::Vec;
#[test]
fn encode_var_usize_1_byte_works() {
let mut buffer = Vec::new();
for i in 0..2usize.pow(7) {
buffer.clear();
assert_eq!(encode_var_usize(&mut buffer, i), 1);
assert_eq!(buffer, [i as u8]);
assert_eq!(decode_var_usize(&buffer), Some((i, 1)));
}
}
#[test]
fn encode_var_usize_2_bytes_works() {
let mut buffer = Vec::new();
for i in 2usize.pow(7)..2usize.pow(14) {
buffer.clear();
assert_eq!(encode_var_usize(&mut buffer, i), 2);
assert_eq!(buffer, [0x80 | ((i & 0x7F) as u8), (0x7F & (i >> 7) as u8)]);
assert_eq!(decode_var_usize(&buffer), Some((i, 2)));
}
}
#[test]
#[cfg_attr(any(miri, tarpaulin), ignore)]
fn encode_var_usize_3_bytes_works() {
let mut buffer = Vec::new();
for i in 2usize.pow(14)..2usize.pow(21) {
buffer.clear();
assert_eq!(encode_var_usize(&mut buffer, i), 3);
assert_eq!(
buffer,
[
0x80 | ((i & 0x7F) as u8),
0x80 | (0x7F & (i >> 7) as u8),
(0x7F & (i >> 14) as u8),
]
);
assert_eq!(decode_var_usize(&buffer), Some((i, 3)));
}
}
#[cfg_attr(any(miri, tarpaulin), ignore)]
fn assert_encode_var_usize_4_bytes(range: core::ops::Range<usize>) {
let mut buffer = Vec::new();
for i in range {
buffer.clear();
assert_eq!(encode_var_usize(&mut buffer, i), 4);
assert_eq!(
buffer,
[
0x80 | ((i & 0x7F) as u8),
0x80 | (0x7F & (i >> 7) as u8),
0x80 | (0x7F & (i >> 14) as u8),
(0x7F & (i >> 21) as u8),
]
);
assert_eq!(decode_var_usize(&buffer), Some((i, 4)));
}
}
#[test]
#[cfg_attr(any(miri, tarpaulin), ignore)]
fn encode_var_usize_4_bytes_01_works() {
assert_encode_var_usize_4_bytes(2usize.pow(21)..2usize.pow(24));
}
#[test]
#[cfg_attr(any(miri, tarpaulin), ignore)]
fn encode_var_usize_4_bytes_02_works() {
assert_encode_var_usize_4_bytes(2usize.pow(24)..2usize.pow(26));
}
#[test]
#[cfg_attr(any(miri, tarpaulin), ignore)]
fn encode_var_usize_4_bytes_03_works() {
assert_encode_var_usize_4_bytes(2usize.pow(26)..2usize.pow(27));
}
#[test]
#[cfg_attr(any(miri, tarpaulin), ignore)]
fn encode_var_usize_4_bytes_04_works() {
assert_encode_var_usize_4_bytes(2usize.pow(27)..2usize.pow(28));
}
#[test]
fn encode_var_u32_max_works() {
let mut buffer = Vec::new();
let i = u32::MAX as usize;
assert_eq!(encode_var_usize(&mut buffer, i), 5);
assert_eq!(buffer, [0xFF, 0xFF, 0xFF, 0xFF, 0x0F]);
assert_eq!(decode_var_usize(&buffer), Some((i, 5)));
}
#[test]
fn encode_var_u64_max_works() {
let mut buffer = Vec::new();
let i = u64::MAX as usize;
assert_eq!(encode_var_usize(&mut buffer, i), 10);
assert_eq!(
buffer,
[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01]
);
assert_eq!(decode_var_usize(&buffer), Some((i, 10)));
}
#[test]
fn decode_var_fail() {
assert_eq!(decode_var_usize(&[]), None);
assert_eq!(decode_var_usize(&[0x80]), None);
}
}
impl<'a, S> IntoIterator for &'a BufferBackend<S>
where
S: Symbol,
{
type Item = (S, &'a str);
type IntoIter = Iter<'a, S>;
#[cfg_attr(feature = "inline-more", inline)]
fn into_iter(self) -> Self::IntoIter {
Self::IntoIter::new(self)
}
}
pub struct Iter<'a, S> {
backend: &'a BufferBackend<S>,
yielded: usize,
current: usize,
}
impl<'a, S> Iter<'a, S> {
#[cfg_attr(feature = "inline-more", inline)]
pub fn new(backend: &'a BufferBackend<S>) -> Self {
Self {
backend,
yielded: 0,
current: 0,
}
}
}
impl<'a, S> Iterator for Iter<'a, S>
where
S: Symbol,
{
type Item = (S, &'a str);
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.len();
(remaining, Some(remaining))
}
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.backend
.resolve_index_to_str(self.current)
.and_then(|(string, next_string_index)| {
let symbol = S::try_from_usize(self.current)?;
self.current = next_string_index;
self.yielded += 1;
Some((symbol, string))
})
}
}
impl<'a, S> ExactSizeIterator for Iter<'a, S>
where
S: Symbol,
{
fn len(&self) -> usize {
self.backend.len_strings - self.yielded
}
}