use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::hash::{BuildHasher, Hash};
use std::io::{Read, Write};
use std::marker::PhantomData;
use crate::Error;
use crate::SkipRevisioned;
use crate::optimised::indexed::OFFSET_TABLE_MIN_LEN;
use crate::optimised::indexed::seq_walk::FLAG_INDEXED;
use crate::{DeserializeRevisioned, SerializeRevisioned};
#[doc(hidden)]
pub trait IndexedMapEncoded: Sized {
type Key;
type Value;
fn serialize_indexed_map<W: Write>(&self, w: &mut W) -> Result<(), Error>;
fn deserialize_indexed_map<R: Read>(r: &mut R) -> Result<Self, Error>;
fn skip_indexed_map<R: Read>(r: &mut R) -> Result<(), Error>;
}
impl<K, V> IndexedMapEncoded for BTreeMap<K, V>
where
K: SerializeRevisioned + DeserializeRevisioned + SkipRevisioned + Ord,
V: SerializeRevisioned + DeserializeRevisioned + SkipRevisioned,
{
type Key = K;
type Value = V;
fn serialize_indexed_map<W: Write>(&self, w: &mut W) -> Result<(), Error> {
serialize_indexed_map(self, w)
}
fn deserialize_indexed_map<R: Read>(r: &mut R) -> Result<Self, Error> {
deserialize_indexed_map(r)
}
fn skip_indexed_map<R: Read>(r: &mut R) -> Result<(), Error> {
skip_indexed_map::<K, V, R>(r)
}
}
impl<K, V, S> IndexedMapEncoded for HashMap<K, V, S>
where
K: SerializeRevisioned + DeserializeRevisioned + SkipRevisioned + Hash + Eq,
V: SerializeRevisioned + DeserializeRevisioned + SkipRevisioned,
S: BuildHasher + Default,
{
type Key = K;
type Value = V;
fn serialize_indexed_map<W: Write>(&self, w: &mut W) -> Result<(), Error> {
serialize_indexed_entries(self.iter(), w)
}
fn deserialize_indexed_map<R: Read>(r: &mut R) -> Result<Self, Error> {
let mut flag_buf = [0u8; 1];
r.read_exact(&mut flag_buf).map_err(Error::Io)?;
let flags = flag_buf[0];
let len = read_varint(r)?;
let mut out: HashMap<K, V, S> = HashMap::with_capacity_and_hasher(len, S::default());
if (flags & FLAG_INDEXED) == 0 {
for _ in 0..len {
let k = K::deserialize_revisioned(r)?;
let v = V::deserialize_revisioned(r)?;
out.insert(k, v);
}
return Ok(out);
}
let table_bytes = len.checked_mul(8).ok_or(Error::OptimisedSubReaderOverrun)?;
let mut discard = vec![0u8; table_bytes + 8];
r.read_exact(&mut discard).map_err(Error::Io)?;
let mut keys: Vec<K> = Vec::with_capacity(len);
for _ in 0..len {
keys.push(K::deserialize_revisioned(r)?);
}
let mut values: Vec<V> = Vec::with_capacity(len);
for _ in 0..len {
values.push(V::deserialize_revisioned(r)?);
}
for (k, v) in keys.into_iter().zip(values) {
out.insert(k, v);
}
Ok(out)
}
fn skip_indexed_map<R: Read>(r: &mut R) -> Result<(), Error> {
skip_indexed_map::<K, V, R>(r)
}
}
#[doc(hidden)]
pub trait IndexedSeqEncoded: Sized {
type Item;
fn serialize_indexed_seq<W: Write>(&self, w: &mut W) -> Result<(), Error>;
fn deserialize_indexed_seq<R: Read>(r: &mut R) -> Result<Self, Error>;
fn skip_indexed_seq<R: Read>(r: &mut R) -> Result<(), Error>;
}
impl<T> IndexedSeqEncoded for Vec<T>
where
T: SerializeRevisioned + DeserializeRevisioned + SkipRevisioned,
{
type Item = T;
fn serialize_indexed_seq<W: Write>(&self, w: &mut W) -> Result<(), Error> {
serialize_indexed_seq(self, w)
}
fn deserialize_indexed_seq<R: Read>(r: &mut R) -> Result<Self, Error> {
deserialize_indexed_seq(r)
}
fn skip_indexed_seq<R: Read>(r: &mut R) -> Result<(), Error> {
skip_indexed_seq::<T, R>(r)
}
}
#[doc(hidden)]
pub trait IndexedSetEncoded: Sized {
type Item;
fn serialize_indexed_set<W: Write>(&self, w: &mut W) -> Result<(), Error>;
fn deserialize_indexed_set<R: Read>(r: &mut R) -> Result<Self, Error>;
fn skip_indexed_set<R: Read>(r: &mut R) -> Result<(), Error>;
}
#[doc(hidden)]
pub fn serialize_indexed_set_iter<'a, I, T, W>(items: I, writer: &mut W) -> Result<(), Error>
where
I: IntoIterator<Item = &'a T>,
T: SerializeRevisioned + 'a,
W: Write,
{
let mut bodies: Vec<Vec<u8>> = Vec::new();
for item in items {
let mut b = Vec::new();
item.serialize_revisioned(&mut b)?;
bodies.push(b);
}
bodies.sort();
let len = bodies.len();
if len < OFFSET_TABLE_MIN_LEN {
writer.write_all(&[0u8]).map_err(Error::Io)?;
write_varint(writer, len)?;
for b in &bodies {
writer.write_all(b).map_err(Error::Io)?;
}
return Ok(());
}
writer.write_all(&[FLAG_INDEXED]).map_err(Error::Io)?;
write_varint(writer, len)?;
let mut off = 0u32;
for b in &bodies {
writer.write_all(&off.to_le_bytes()).map_err(Error::Io)?;
off = off.checked_add(b.len() as u32).ok_or_else(|| {
Error::Serialize("indexed set element region exceeds u32::MAX".into())
})?;
}
for b in &bodies {
writer.write_all(b).map_err(Error::Io)?;
}
Ok(())
}
#[doc(hidden)]
pub fn deserialize_indexed_set<S, T, R>(reader: &mut R) -> Result<S, Error>
where
S: FromIterator<T>,
T: DeserializeRevisioned,
R: Read,
{
let v: Vec<T> = deserialize_indexed_seq(reader)?;
Ok(v.into_iter().collect())
}
#[doc(hidden)]
pub fn skip_indexed_set<T, R>(reader: &mut R) -> Result<(), Error>
where
T: SkipRevisioned,
R: Read,
{
skip_indexed_seq::<T, R>(reader)
}
impl<T> IndexedSetEncoded for BTreeSet<T>
where
T: SerializeRevisioned + DeserializeRevisioned + SkipRevisioned + Ord,
{
type Item = T;
fn serialize_indexed_set<W: Write>(&self, w: &mut W) -> Result<(), Error> {
serialize_indexed_set_iter(self.iter(), w)
}
fn deserialize_indexed_set<R: Read>(r: &mut R) -> Result<Self, Error> {
deserialize_indexed_set(r)
}
fn skip_indexed_set<R: Read>(r: &mut R) -> Result<(), Error> {
skip_indexed_set::<T, R>(r)
}
}
impl<T, S> IndexedSetEncoded for HashSet<T, S>
where
T: SerializeRevisioned + DeserializeRevisioned + SkipRevisioned + Hash + Eq,
S: BuildHasher + Default,
{
type Item = T;
fn serialize_indexed_set<W: Write>(&self, w: &mut W) -> Result<(), Error> {
serialize_indexed_set_iter(self.iter(), w)
}
fn deserialize_indexed_set<R: Read>(r: &mut R) -> Result<Self, Error> {
let v: Vec<T> = deserialize_indexed_seq(r)?;
Ok(v.into_iter().collect())
}
fn skip_indexed_set<R: Read>(r: &mut R) -> Result<(), Error> {
skip_indexed_seq::<T, R>(r)
}
}
pub struct IndexedMapView<'r, K, V> {
bytes: std::borrow::Cow<'r, [u8]>,
_marker: PhantomData<fn() -> (K, V)>,
}
impl<'r, K, V> IndexedMapView<'r, K, V> {
#[doc(hidden)]
pub fn new(bytes: std::borrow::Cow<'r, [u8]>) -> Self {
Self {
bytes,
_marker: PhantomData,
}
}
pub fn walker(&self) -> Result<crate::optimised::IndexedMapWalker<'_, K, V>, Error> {
crate::optimised::IndexedMapWalker::from_payload(&self.bytes)
}
pub fn as_bytes(&self) -> &[u8] {
&self.bytes
}
pub fn into_bytes(self) -> std::borrow::Cow<'r, [u8]> {
self.bytes
}
}
pub struct VariantView<'r, T> {
bytes: std::borrow::Cow<'r, [u8]>,
_marker: PhantomData<fn() -> T>,
}
impl<'r, T> VariantView<'r, T> {
#[doc(hidden)]
pub fn new(bytes: std::borrow::Cow<'r, [u8]>) -> Self {
Self {
bytes,
_marker: PhantomData,
}
}
pub fn as_bytes(&self) -> &[u8] {
&self.bytes
}
pub fn into_bytes(self) -> std::borrow::Cow<'r, [u8]> {
self.bytes
}
}
pub struct IndexedSetView<'r, T> {
bytes: std::borrow::Cow<'r, [u8]>,
_marker: PhantomData<fn() -> T>,
}
impl<'r, T> IndexedSetView<'r, T> {
#[doc(hidden)]
pub fn new(bytes: std::borrow::Cow<'r, [u8]>) -> Self {
Self {
bytes,
_marker: PhantomData,
}
}
pub fn walker(&self) -> Result<crate::optimised::IndexedSeqWalker<'_, T>, Error> {
crate::optimised::IndexedSeqWalker::from_payload(&self.bytes)
}
pub fn as_bytes(&self) -> &[u8] {
&self.bytes
}
pub fn into_bytes(self) -> std::borrow::Cow<'r, [u8]> {
self.bytes
}
}
pub struct IndexedSeqView<'r, T> {
bytes: std::borrow::Cow<'r, [u8]>,
_marker: PhantomData<fn() -> T>,
}
impl<'r, T> IndexedSeqView<'r, T> {
#[doc(hidden)]
pub fn new(bytes: std::borrow::Cow<'r, [u8]>) -> Self {
Self {
bytes,
_marker: PhantomData,
}
}
pub fn walker(&self) -> Result<crate::optimised::IndexedSeqWalker<'_, T>, Error> {
crate::optimised::IndexedSeqWalker::from_payload(&self.bytes)
}
pub fn as_bytes(&self) -> &[u8] {
&self.bytes
}
pub fn into_bytes(self) -> std::borrow::Cow<'r, [u8]> {
self.bytes
}
}
#[doc(hidden)]
pub fn serialize_indexed_map<K, V, W: Write>(
map: &BTreeMap<K, V>,
writer: &mut W,
) -> Result<(), Error>
where
K: SerializeRevisioned,
V: SerializeRevisioned,
{
serialize_indexed_entries(map.iter(), writer)
}
#[doc(hidden)]
pub fn serialize_indexed_entries<'a, I, K, V, W>(entries: I, writer: &mut W) -> Result<(), Error>
where
I: IntoIterator<Item = (&'a K, &'a V)>,
K: SerializeRevisioned + 'a,
V: SerializeRevisioned + 'a,
W: Write,
{
let mut pairs: Vec<(Vec<u8>, Vec<u8>)> = Vec::new();
for (k, v) in entries {
let mut kb = Vec::new();
k.serialize_revisioned(&mut kb)?;
let mut vb = Vec::new();
v.serialize_revisioned(&mut vb)?;
pairs.push((kb, vb));
}
pairs.sort_by(|a, b| a.0.cmp(&b.0));
let len = pairs.len();
if len < OFFSET_TABLE_MIN_LEN {
writer.write_all(&[0u8]).map_err(Error::Io)?; write_varint(writer, len)?;
for (kb, vb) in &pairs {
writer.write_all(kb).map_err(Error::Io)?;
writer.write_all(vb).map_err(Error::Io)?;
}
return Ok(());
}
let (keys, vals): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
writer.write_all(&[FLAG_INDEXED]).map_err(Error::Io)?;
write_varint(writer, len)?;
let mut k_off = 0u32;
let mut v_off = 0u32;
let mut k_offsets = Vec::with_capacity(len);
let mut v_offsets = Vec::with_capacity(len);
for (kb, vb) in keys.iter().zip(vals.iter()) {
k_offsets.push(k_off);
v_offsets.push(v_off);
k_off = k_off
.checked_add(kb.len() as u32)
.ok_or_else(|| Error::Serialize("indexed map key region exceeds u32::MAX".into()))?;
v_off = v_off
.checked_add(vb.len() as u32)
.ok_or_else(|| Error::Serialize("indexed map value region exceeds u32::MAX".into()))?;
}
for i in 0..len {
writer.write_all(&k_offsets[i].to_le_bytes()).map_err(Error::Io)?;
writer.write_all(&v_offsets[i].to_le_bytes()).map_err(Error::Io)?;
}
writer.write_all(&k_off.to_le_bytes()).map_err(Error::Io)?;
writer.write_all(&v_off.to_le_bytes()).map_err(Error::Io)?;
for kb in &keys {
writer.write_all(kb).map_err(Error::Io)?;
}
for vb in &vals {
writer.write_all(vb).map_err(Error::Io)?;
}
Ok(())
}
#[doc(hidden)]
pub fn serialize_indexed_seq<T, W: Write>(items: &[T], writer: &mut W) -> Result<(), Error>
where
T: SerializeRevisioned,
{
serialize_indexed_seq_iter(items.iter(), writer)
}
#[doc(hidden)]
pub fn serialize_indexed_seq_iter<'a, I, T, W>(items: I, writer: &mut W) -> Result<(), Error>
where
I: IntoIterator<Item = &'a T>,
T: SerializeRevisioned + 'a,
W: Write,
{
let mut bodies: Vec<Vec<u8>> = Vec::new();
for item in items {
let mut b = Vec::new();
item.serialize_revisioned(&mut b)?;
bodies.push(b);
}
let len = bodies.len();
if len < OFFSET_TABLE_MIN_LEN {
writer.write_all(&[0u8]).map_err(Error::Io)?;
write_varint(writer, len)?;
for b in &bodies {
writer.write_all(b).map_err(Error::Io)?;
}
return Ok(());
}
writer.write_all(&[FLAG_INDEXED]).map_err(Error::Io)?;
write_varint(writer, len)?;
let mut off = 0u32;
for b in &bodies {
writer.write_all(&off.to_le_bytes()).map_err(Error::Io)?;
off = off
.checked_add(b.len() as u32)
.ok_or_else(|| Error::Serialize("indexed seq exceeds u32::MAX".into()))?;
}
for b in &bodies {
writer.write_all(b).map_err(Error::Io)?;
}
Ok(())
}
#[doc(hidden)]
pub fn deserialize_indexed_map<K, V, R: Read>(reader: &mut R) -> Result<BTreeMap<K, V>, Error>
where
K: DeserializeRevisioned + Ord,
V: DeserializeRevisioned,
{
let mut flag_buf = [0u8; 1];
reader.read_exact(&mut flag_buf).map_err(Error::Io)?;
let flags = flag_buf[0];
let len = read_varint(reader)?;
if (flags & FLAG_INDEXED) == 0 {
let mut out = BTreeMap::new();
for _ in 0..len {
let k = K::deserialize_revisioned(reader)?;
let v = V::deserialize_revisioned(reader)?;
out.insert(k, v);
}
return Ok(out);
}
let table_bytes = len.checked_mul(8).ok_or(Error::OptimisedSubReaderOverrun)?;
let mut discard = vec![0u8; table_bytes + 8];
reader.read_exact(&mut discard).map_err(Error::Io)?;
let mut keys: Vec<K> = Vec::with_capacity(len);
for _ in 0..len {
keys.push(K::deserialize_revisioned(reader)?);
}
let mut values: Vec<V> = Vec::with_capacity(len);
for _ in 0..len {
values.push(V::deserialize_revisioned(reader)?);
}
let mut out = BTreeMap::new();
for (k, v) in keys.into_iter().zip(values) {
out.insert(k, v);
}
Ok(out)
}
#[doc(hidden)]
pub fn deserialize_indexed_seq<T, R: Read>(reader: &mut R) -> Result<Vec<T>, Error>
where
T: DeserializeRevisioned,
{
let mut flag_buf = [0u8; 1];
reader.read_exact(&mut flag_buf).map_err(Error::Io)?;
let flags = flag_buf[0];
let len = read_varint(reader)?;
if (flags & FLAG_INDEXED) == 0 {
let mut out = Vec::with_capacity(len);
for _ in 0..len {
out.push(T::deserialize_revisioned(reader)?);
}
return Ok(out);
}
let table_bytes = len.checked_mul(4).ok_or(Error::OptimisedSubReaderOverrun)?;
let mut discard = vec![0u8; table_bytes];
reader.read_exact(&mut discard).map_err(Error::Io)?;
let mut out = Vec::with_capacity(len);
for _ in 0..len {
out.push(T::deserialize_revisioned(reader)?);
}
Ok(out)
}
#[doc(hidden)]
pub fn skip_indexed_map<K, V, R: Read>(reader: &mut R) -> Result<(), Error>
where
K: SkipRevisioned,
V: SkipRevisioned,
{
let mut flag_buf = [0u8; 1];
reader.read_exact(&mut flag_buf).map_err(Error::Io)?;
let flags = flag_buf[0];
let len = read_varint(reader)?;
if (flags & FLAG_INDEXED) == 0 {
for _ in 0..len {
K::skip_revisioned(reader)?;
V::skip_revisioned(reader)?;
}
return Ok(());
}
let table_bytes = len.checked_mul(8).ok_or(Error::OptimisedSubReaderOverrun)?;
let mut discard = vec![0u8; table_bytes + 8];
reader.read_exact(&mut discard).map_err(Error::Io)?;
for _ in 0..len {
K::skip_revisioned(reader)?;
}
for _ in 0..len {
V::skip_revisioned(reader)?;
}
Ok(())
}
#[doc(hidden)]
pub fn skip_indexed_seq<T, R: Read>(reader: &mut R) -> Result<(), Error>
where
T: SkipRevisioned,
{
let mut flag_buf = [0u8; 1];
reader.read_exact(&mut flag_buf).map_err(Error::Io)?;
let flags = flag_buf[0];
let len = read_varint(reader)?;
if (flags & FLAG_INDEXED) == 0 {
for _ in 0..len {
T::skip_revisioned(reader)?;
}
return Ok(());
}
let table_bytes = len.checked_mul(4).ok_or(Error::OptimisedSubReaderOverrun)?;
let mut discard = vec![0u8; table_bytes];
reader.read_exact(&mut discard).map_err(Error::Io)?;
for _ in 0..len {
T::skip_revisioned(reader)?;
}
Ok(())
}
#[doc(hidden)]
fn read_varint<R: Read>(r: &mut R) -> Result<usize, Error> {
let mut tag_buf = [0u8; 1];
r.read_exact(&mut tag_buf).map_err(Error::Io)?;
let tag = tag_buf[0];
match tag {
0..=250 => Ok(tag as usize),
251 => {
let mut b = [0u8; 2];
r.read_exact(&mut b).map_err(Error::Io)?;
Ok(u16::from_le_bytes(b) as usize)
}
252 => {
let mut b = [0u8; 4];
r.read_exact(&mut b).map_err(Error::Io)?;
Ok(u32::from_le_bytes(b) as usize)
}
253 => {
let mut b = [0u8; 8];
r.read_exact(&mut b).map_err(Error::Io)?;
let v = u64::from_le_bytes(b);
usize::try_from(v).map_err(|_| Error::IntegerOverflow)
}
_ => Err(Error::InvalidIntegerEncoding),
}
}
#[doc(hidden)]
fn write_varint<W: Write>(w: &mut W, v: usize) -> Result<(), Error> {
if v <= 250 {
w.write_all(&[v as u8]).map_err(Error::Io)
} else if v <= u16::MAX as usize {
w.write_all(&[251]).map_err(Error::Io)?;
w.write_all(&(v as u16).to_le_bytes()).map_err(Error::Io)
} else if v <= u32::MAX as usize {
w.write_all(&[252]).map_err(Error::Io)?;
w.write_all(&(v as u32).to_le_bytes()).map_err(Error::Io)
} else {
w.write_all(&[253]).map_err(Error::Io)?;
w.write_all(&(v as u64).to_le_bytes()).map_err(Error::Io)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::optimised::{IndexedMapWalker, IndexedSeqWalker};
#[test]
fn round_trip_indexed_map_of_strings_below_threshold_uses_legacy_body() {
let mut map: BTreeMap<String, u32> = BTreeMap::new();
map.insert("alpha".into(), 1);
map.insert("bravo".into(), 2);
map.insert("charlie".into(), 3);
let mut bytes = Vec::new();
serialize_indexed_map(&map, &mut bytes).unwrap();
let walker: IndexedMapWalker<String, u32> = IndexedMapWalker::from_payload(&bytes).unwrap();
assert!(!walker.is_indexed(), "3 < threshold: should use legacy body");
assert_eq!(walker.len(), 3);
}
#[test]
fn round_trip_indexed_map_at_threshold_emits_offset_table() {
let mut map: BTreeMap<String, u32> = BTreeMap::new();
for (i, s) in ["a", "b", "c", "d", "e", "f", "g", "h"].iter().enumerate() {
map.insert(s.to_string(), i as u32);
}
let mut bytes = Vec::new();
serialize_indexed_map(&map, &mut bytes).unwrap();
let walker: IndexedMapWalker<String, u32> = IndexedMapWalker::from_payload(&bytes).unwrap();
assert!(walker.is_indexed());
assert_eq!(walker.len(), 8);
let entries: Vec<(&[u8], &[u8])> = walker.entries().unwrap().collect();
assert_eq!(entries.len(), 8);
}
#[test]
fn round_trip_indexed_seq_below_threshold_uses_legacy_body() {
let items: Vec<u32> = vec![10, 20, 30];
let mut bytes = Vec::new();
serialize_indexed_seq(&items, &mut bytes).unwrap();
let walker: IndexedSeqWalker<u32> = IndexedSeqWalker::from_payload(&bytes).unwrap();
assert!(!walker.is_indexed(), "3 < threshold: legacy body");
assert_eq!(walker.len(), 3);
}
#[test]
fn round_trip_indexed_seq_at_threshold_emits_offset_table() {
let items: Vec<u32> = (0u32..8).collect();
let mut bytes = Vec::new();
serialize_indexed_seq(&items, &mut bytes).unwrap();
let walker: IndexedSeqWalker<u32> = IndexedSeqWalker::from_payload(&bytes).unwrap();
assert!(walker.is_indexed());
assert_eq!(walker.len(), 8);
}
#[test]
fn serialize_then_deserialize_indexed_map_round_trips() {
let mut original: BTreeMap<String, u32> = BTreeMap::new();
for (i, s) in ["alpha", "bravo", "charlie", "delta"].iter().enumerate() {
original.insert(s.to_string(), i as u32);
}
let mut bytes = Vec::new();
serialize_indexed_map(&original, &mut bytes).unwrap();
let mut r: &[u8] = &bytes;
let decoded: BTreeMap<String, u32> = deserialize_indexed_map(&mut r).unwrap();
assert_eq!(decoded, original);
assert!(r.is_empty(), "deserialize should consume the whole input");
}
#[test]
fn serialize_then_deserialize_indexed_seq_round_trips() {
let original: Vec<u32> = vec![10, 20, 30, 40, 50];
let mut bytes = Vec::new();
serialize_indexed_seq(&original, &mut bytes).unwrap();
let mut r: &[u8] = &bytes;
let decoded: Vec<u32> = deserialize_indexed_seq(&mut r).unwrap();
assert_eq!(decoded, original);
}
#[test]
fn indexed_map_walker_finds_serialised_key() {
let mut map: BTreeMap<u32, u32> = BTreeMap::new();
for i in 0u32..16 {
map.insert(i, i * 10);
}
let mut bytes = Vec::new();
serialize_indexed_map(&map, &mut bytes).unwrap();
let walker: IndexedMapWalker<u32, u32> = IndexedMapWalker::from_payload(&bytes).unwrap();
let target_key = 7u32;
let mut target_bytes = Vec::new();
target_key.serialize_revisioned(&mut target_bytes).unwrap();
let value_bytes = walker
.find_value_bytes(|k| k.cmp(target_bytes.as_slice()))
.unwrap()
.expect("key 7 should be present");
let mut r: &[u8] = value_bytes;
use crate::DeserializeRevisioned;
let v: u32 = u32::deserialize_revisioned(&mut r).unwrap();
assert_eq!(v, 70);
}
}