use std::ops::Range;
use deepsize::DeepSizeOf;
#[derive(Debug, Clone, PartialEq, Eq, DeepSizeOf)]
pub enum EncodedU64Array {
U16 { base: u64, offsets: Vec<u16> },
U32 { base: u64, offsets: Vec<u32> },
U64(Vec<u64>),
}
impl EncodedU64Array {
pub fn len(&self) -> usize {
match self {
Self::U16 { offsets, .. } => offsets.len(),
Self::U32 { offsets, .. } => offsets.len(),
Self::U64(values) => values.len(),
}
}
pub fn iter(&self) -> Box<dyn DoubleEndedIterator<Item = u64> + '_> {
match self {
Self::U16 { base, offsets } => {
Box::new(offsets.iter().cloned().map(move |o| base + o as u64))
}
Self::U32 { base, offsets } => {
Box::new(offsets.iter().cloned().map(move |o| base + o as u64))
}
Self::U64(values) => Box::new(values.iter().cloned()),
}
}
pub fn get(&self, i: usize) -> Option<u64> {
match self {
Self::U16 { base, offsets } => {
if i < offsets.len() {
Some(*base + offsets[i] as u64)
} else {
None
}
}
Self::U32 { base, offsets } => {
if i < offsets.len() {
Some(*base + offsets[i] as u64)
} else {
None
}
}
Self::U64(values) => values.get(i).copied(),
}
}
pub fn min(&self) -> Option<u64> {
match self {
Self::U16 { base, offsets } => {
if offsets.is_empty() {
None
} else {
Some(*base)
}
}
Self::U32 { base, offsets } => {
if offsets.is_empty() {
None
} else {
Some(*base)
}
}
Self::U64(values) => values.iter().copied().min(),
}
}
pub fn max(&self) -> Option<u64> {
match self {
Self::U16 { base, offsets } => {
if offsets.is_empty() {
None
} else {
Some(*base + offsets.iter().copied().max().unwrap() as u64)
}
}
Self::U32 { base, offsets } => {
if offsets.is_empty() {
None
} else {
Some(*base + offsets.iter().copied().max().unwrap() as u64)
}
}
Self::U64(values) => values.iter().copied().max(),
}
}
pub fn first(&self) -> Option<u64> {
match self {
Self::U16 { base, offsets } => {
if offsets.is_empty() {
None
} else {
Some(*base + *offsets.first().unwrap() as u64)
}
}
Self::U32 { base, offsets } => {
if offsets.is_empty() {
None
} else {
Some(*base + *offsets.first().unwrap() as u64)
}
}
Self::U64(values) => values.first().copied(),
}
}
pub fn last(&self) -> Option<u64> {
match self {
Self::U16 { base, offsets } => {
if offsets.is_empty() {
None
} else {
Some(*base + *offsets.last().unwrap() as u64)
}
}
Self::U32 { base, offsets } => {
if offsets.is_empty() {
None
} else {
Some(*base + *offsets.last().unwrap() as u64)
}
}
Self::U64(values) => values.last().copied(),
}
}
pub fn binary_search(&self, val: u64) -> std::result::Result<usize, usize> {
match self {
Self::U16 { base, offsets } => match val.checked_sub(*base) {
None => Err(0),
Some(val) => {
if val > u16::MAX as u64 {
return Err(offsets.len());
}
let u16 = val as u16;
offsets.binary_search(&u16)
}
},
Self::U32 { base, offsets } => match val.checked_sub(*base) {
None => Err(0),
Some(val) => {
if val > u32::MAX as u64 {
return Err(offsets.len());
}
let u32 = val as u32;
offsets.binary_search(&u32)
}
},
Self::U64(values) => values.binary_search(&val),
}
}
pub fn slice(&self, offset: usize, len: usize) -> Self {
match self {
Self::U16 { base, offsets } => offsets[offset..(offset + len)]
.iter()
.map(|o| *base + *o as u64)
.collect(),
Self::U32 { base, offsets } => offsets[offset..(offset + len)]
.iter()
.map(|o| *base + *o as u64)
.collect(),
Self::U64(values) => {
let values = values[offset..(offset + len)].to_vec();
Self::U64(values)
}
}
}
}
impl From<Vec<u64>> for EncodedU64Array {
fn from(values: Vec<u64>) -> Self {
let min = values.iter().copied().min().unwrap_or(0);
let max = values.iter().copied().max().unwrap_or(0);
let range = max - min;
if values.is_empty() {
Self::U64(Vec::new())
} else if range <= u16::MAX as u64 {
let base = min;
let offsets = values.iter().map(|v| (*v - base) as u16).collect();
Self::U16 { base, offsets }
} else if range <= u32::MAX as u64 {
let base = min;
let offsets = values.iter().map(|v| (*v - base) as u32).collect();
Self::U32 { base, offsets }
} else {
Self::U64(values)
}
}
}
impl From<Range<u64>> for EncodedU64Array {
fn from(range: Range<u64>) -> Self {
let min = range.start;
let max = range.end;
let range = max - min;
if range < u16::MAX as u64 {
let base = min;
let offsets = (0..range as u16).collect();
Self::U16 { base, offsets }
} else if range < u32::MAX as u64 {
let base = min;
let offsets = (0..range as u32).collect();
Self::U32 { base, offsets }
} else {
Self::U64((min..max).collect())
}
}
}
impl FromIterator<u64> for EncodedU64Array {
fn from_iter<I: IntoIterator<Item = u64>>(iter: I) -> Self {
let values: Vec<u64> = iter.into_iter().collect();
Self::from(values)
}
}
impl IntoIterator for EncodedU64Array {
type Item = u64;
type IntoIter = Box<dyn DoubleEndedIterator<Item = u64>>;
fn into_iter(self) -> Self::IntoIter {
match self {
Self::U16 { base, offsets } => {
Box::new(offsets.into_iter().map(move |o| base + o as u64))
}
Self::U32 { base, offsets } => {
Box::new(offsets.into_iter().map(move |o| base + o as u64))
}
Self::U64(values) => Box::new(values.into_iter()),
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_encoded_array_from_vec() {
fn roundtrip_array(values: Vec<u64>, expected: &EncodedU64Array) {
let encoded = EncodedU64Array::from(values.clone());
assert_eq!(&encoded, expected);
assert_eq!(values.len(), encoded.len());
assert_eq!(values.first(), encoded.first().as_ref());
assert_eq!(values.last(), encoded.last().as_ref());
assert_eq!(values.iter().min(), encoded.min().as_ref());
assert_eq!(values.iter().max(), encoded.max().as_ref());
let roundtripped = encoded.iter().collect::<Vec<_>>();
assert_eq!(values, roundtripped);
for (i, v) in values.iter().enumerate() {
assert_eq!(Some(*v), encoded.get(i));
}
let encoded2 = values.into_iter().collect::<EncodedU64Array>();
assert_eq!(&encoded2, expected);
}
roundtrip_array(vec![], &EncodedU64Array::U64(vec![]));
roundtrip_array(
vec![42],
&EncodedU64Array::U16 {
base: 42,
offsets: vec![0],
},
);
let relative_values = [42, 0, 43, u16::MAX as u64, 99];
let values = relative_values.map(|v| v + 2 * u16::MAX as u64).to_vec();
let expected = EncodedU64Array::U16 {
base: 2 * u16::MAX as u64,
offsets: relative_values.iter().map(|v| *v as u16).collect(),
};
roundtrip_array(values, &expected);
let relative_values = [42, 0, 43, u32::MAX as u64, 99];
let values = relative_values.map(|v| v + 2 * u32::MAX as u64).to_vec();
let expected = EncodedU64Array::U32 {
base: 2 * u32::MAX as u64,
offsets: relative_values.iter().map(|v| *v as u32).collect(),
};
roundtrip_array(values, &expected);
let values = [42, 0, 43, u64::MAX, 99].to_vec();
let expected = EncodedU64Array::U64(values.clone());
roundtrip_array(values, &expected);
}
#[test]
fn test_double_ended_iter() {
let arrays = vec![
EncodedU64Array::U16 {
base: 42,
offsets: vec![0, 1, 2, 3, 4],
},
EncodedU64Array::U32 {
base: 42,
offsets: vec![0, 1, 2, 3, 4],
},
EncodedU64Array::U64(vec![42, 43, 44, 45, 46]),
];
for array in arrays {
let forwards = array.iter().collect::<Vec<_>>();
let mut backwards = array.iter().rev().collect::<Vec<_>>();
backwards.reverse();
assert_eq!(forwards, backwards);
let mut expected = Vec::with_capacity(array.len());
let mut actual = Vec::with_capacity(array.len());
let mut iter = array.iter();
for i in 0..array.len() {
if i % 2 == 0 {
actual.push(iter.next().unwrap());
expected.push(array.get(i / 2).unwrap());
} else {
let i = array.len() - 1 - i / 2;
actual.push(iter.next_back().unwrap());
expected.push(array.get(i).unwrap());
};
}
assert_eq!(expected, actual);
}
}
#[test]
fn test_encoded_array_from_range() {
let range = (2 * u16::MAX as u64)..(40 + 2 * u16::MAX as u64);
let encoded = EncodedU64Array::from(range.clone());
let expected_base = 2 * u16::MAX as u64;
assert!(
matches!(
encoded,
EncodedU64Array::U16 {
base,
..
} if base == expected_base
),
"{:?}",
encoded
);
let roundtripped = encoded.into_iter().collect::<Vec<_>>();
assert_eq!(range.collect::<Vec<_>>(), roundtripped);
let range = (2 * u32::MAX as u64)..(u16::MAX as u64 + 10 + 2 * u32::MAX as u64);
let encoded = EncodedU64Array::from(range.clone());
let expected_base = 2 * u32::MAX as u64;
assert!(matches!(
encoded,
EncodedU64Array::U32 {
base,
..
} if base == expected_base
));
let roundtripped = encoded.into_iter().collect::<Vec<_>>();
assert_eq!(range.collect::<Vec<_>>(), roundtripped);
let range = 42..42;
let encoded = EncodedU64Array::from(range);
assert_eq!(encoded.len(), 0);
}
}