use std::sync::atomic::{AtomicU64, Ordering};
#[repr(u8)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ValueTag {
Null = 0,
Bool = 1,
SmallInt = 2,
InlineString = 3,
HeapString = 4,
InlineBytes = 5,
HeapBytes = 6,
Float = 7,
}
impl ValueTag {
fn from_u8(v: u8) -> Option<Self> {
match v {
0 => Some(Self::Null),
1 => Some(Self::Bool),
2 => Some(Self::SmallInt),
3 => Some(Self::InlineString),
4 => Some(Self::HeapString),
5 => Some(Self::InlineBytes),
6 => Some(Self::HeapBytes),
7 => Some(Self::Float),
_ => None,
}
}
}
const MAX_INLINE_SIZE: usize = 7;
const TAG_SHIFT: u32 = 61;
const INLINE_LEN_SHIFT: u32 = 56;
const INLINE_LEN_MASK: u64 = 0x0F;
const INLINE_DATA_MASK: u64 = 0x00FF_FFFF_FFFF_FFFF;
#[derive(Clone)]
pub struct PolymorphicValue {
bits: u64,
heap: Option<Box<[u8]>>,
}
impl PolymorphicValue {
#[inline]
pub fn null() -> Self {
Self {
bits: (ValueTag::Null as u64) << TAG_SHIFT,
heap: None,
}
}
#[inline]
pub fn bool(v: bool) -> Self {
let bits = ((ValueTag::Bool as u64) << TAG_SHIFT) | (v as u64);
Self { bits, heap: None }
}
#[inline]
pub fn int(v: i64) -> Self {
if v >= -(1i64 << 55) && v < (1i64 << 55) {
let bits = ((ValueTag::SmallInt as u64) << TAG_SHIFT) | ((v as u64) & INLINE_DATA_MASK);
Self { bits, heap: None }
} else {
let bytes = v.to_le_bytes();
Self::heap_bytes(&bytes, ValueTag::HeapBytes)
}
}
pub fn string(s: &str) -> Self {
let bytes = s.as_bytes();
if bytes.len() <= MAX_INLINE_SIZE {
Self::inline_bytes(bytes, ValueTag::InlineString)
} else {
Self::heap_bytes(bytes, ValueTag::HeapString)
}
}
pub fn bytes(b: &[u8]) -> Self {
if b.len() <= MAX_INLINE_SIZE {
Self::inline_bytes(b, ValueTag::InlineBytes)
} else {
Self::heap_bytes(b, ValueTag::HeapBytes)
}
}
#[inline]
pub fn float(v: f64) -> Self {
let bits = v.to_bits();
let packed = ((ValueTag::Float as u64) << TAG_SHIFT) | (bits & INLINE_DATA_MASK);
Self {
bits: packed,
heap: Some(Box::new(bits.to_le_bytes())),
}
}
fn inline_bytes(data: &[u8], tag: ValueTag) -> Self {
debug_assert!(data.len() <= MAX_INLINE_SIZE);
let mut packed = 0u64;
for (i, &byte) in data.iter().enumerate() {
packed |= (byte as u64) << (i * 8);
}
let bits = ((tag as u64) << TAG_SHIFT) | ((data.len() as u64) << INLINE_LEN_SHIFT) | packed;
Self { bits, heap: None }
}
fn heap_bytes(data: &[u8], tag: ValueTag) -> Self {
let heap = data.to_vec().into_boxed_slice();
let bits = ((tag as u64) << TAG_SHIFT) | (data.len() as u64 & 0x1FFF_FFFF);
Self {
bits,
heap: Some(heap),
}
}
#[inline]
pub fn tag(&self) -> ValueTag {
ValueTag::from_u8((self.bits >> TAG_SHIFT) as u8 & 0x07).unwrap_or(ValueTag::Null)
}
#[inline]
pub fn is_null(&self) -> bool {
self.tag() == ValueTag::Null
}
#[inline]
pub fn is_inline(&self) -> bool {
matches!(
self.tag(),
ValueTag::Null
| ValueTag::Bool
| ValueTag::SmallInt
| ValueTag::InlineString
| ValueTag::InlineBytes
)
}
#[inline]
pub fn as_bool(&self) -> Option<bool> {
if self.tag() == ValueTag::Bool {
Some((self.bits & 1) != 0)
} else {
None
}
}
#[inline]
pub fn as_int(&self) -> Option<i64> {
if self.tag() == ValueTag::SmallInt {
let raw = (self.bits & INLINE_DATA_MASK) as i64;
let sign_bit = 1i64 << 55;
Some(if raw & sign_bit != 0 {
raw | !INLINE_DATA_MASK as i64
} else {
raw
})
} else {
None
}
}
pub fn as_float(&self) -> Option<f64> {
if self.tag() == ValueTag::Float {
self.heap.as_ref().map(|h| {
let bytes: [u8; 8] = h.as_ref().try_into().unwrap_or([0; 8]);
f64::from_le_bytes(bytes)
})
} else {
None
}
}
#[inline]
fn inline_len(&self) -> usize {
((self.bits >> INLINE_LEN_SHIFT) & INLINE_LEN_MASK) as usize
}
pub fn as_str(&self) -> Option<String> {
match self.tag() {
ValueTag::InlineString => self
.inline_bytes_copy()
.and_then(|bytes| String::from_utf8(bytes).ok()),
ValueTag::HeapString => self
.heap
.as_ref()
.and_then(|h| std::str::from_utf8(h).ok().map(|s| s.to_owned())),
_ => None,
}
}
pub fn as_heap_str(&self) -> Option<&str> {
match self.tag() {
ValueTag::HeapString => self.heap.as_ref().and_then(|h| std::str::from_utf8(h).ok()),
_ => None,
}
}
pub fn as_bytes(&self) -> Option<&[u8]> {
match self.tag() {
ValueTag::InlineBytes => {
None
}
ValueTag::HeapBytes => self.heap.as_ref().map(|h| h.as_ref()),
_ => None,
}
}
pub fn inline_bytes_copy(&self) -> Option<Vec<u8>> {
match self.tag() {
ValueTag::InlineBytes | ValueTag::InlineString => {
let len = self.inline_len();
let bytes = (self.bits & INLINE_DATA_MASK).to_le_bytes();
Some(bytes[..len].to_vec())
}
_ => None,
}
}
pub fn encoded_size(&self) -> usize {
8 + self.heap.as_ref().map(|h| h.len()).unwrap_or(0)
}
pub fn to_bytes(&self) -> Vec<u8> {
let mut buf = Vec::with_capacity(self.encoded_size());
buf.extend_from_slice(&self.bits.to_le_bytes());
if let Some(ref heap) = self.heap {
buf.extend_from_slice(heap);
}
buf
}
pub fn from_bytes(data: &[u8]) -> Option<Self> {
if data.len() < 8 {
return None;
}
let bits = u64::from_le_bytes(data[..8].try_into().ok()?);
let tag = ValueTag::from_u8((bits >> TAG_SHIFT) as u8 & 0x07)?;
let heap = match tag {
ValueTag::HeapString | ValueTag::HeapBytes => {
let len = (bits & 0x1FFF_FFFF) as usize;
if data.len() < 8 + len {
return None;
}
Some(data[8..8 + len].to_vec().into_boxed_slice())
}
ValueTag::Float => {
if data.len() >= 16 {
Some(data[8..16].to_vec().into_boxed_slice())
} else {
None
}
}
_ => None,
};
Some(Self { bits, heap })
}
}
impl Default for PolymorphicValue {
fn default() -> Self {
Self::null()
}
}
impl std::fmt::Debug for PolymorphicValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.tag() {
ValueTag::Null => write!(f, "Null"),
ValueTag::Bool => write!(f, "Bool({})", self.as_bool().unwrap_or(false)),
ValueTag::SmallInt => write!(f, "Int({})", self.as_int().unwrap_or(0)),
ValueTag::InlineString | ValueTag::HeapString => {
if let Some(s) = self.as_str() {
write!(f, "String({:?})", s)
} else if let Some(b) = self.inline_bytes_copy() {
write!(f, "String({:?})", String::from_utf8_lossy(&b))
} else {
write!(f, "String(<invalid>)")
}
}
ValueTag::InlineBytes | ValueTag::HeapBytes => {
write!(f, "Bytes({} bytes)", self.inline_len())
}
ValueTag::Float => write!(f, "Float({:?})", self.as_float()),
}
}
}
pub struct AtomicPolymorphicValue {
bits: AtomicU64,
}
impl AtomicPolymorphicValue {
pub fn null() -> Self {
Self {
bits: AtomicU64::new((ValueTag::Null as u64) << TAG_SHIFT),
}
}
pub fn from_inline(v: &PolymorphicValue) -> Option<Self> {
if v.is_inline() {
Some(Self {
bits: AtomicU64::new(v.bits),
})
} else {
None
}
}
#[inline]
pub fn load(&self, order: Ordering) -> u64 {
self.bits.load(order)
}
#[inline]
pub fn store(&self, value: &PolymorphicValue, order: Ordering) {
debug_assert!(
value.is_inline(),
"AtomicPolymorphicValue only supports inline values"
);
self.bits.store(value.bits, order);
}
#[inline]
pub fn compare_exchange(
&self,
current: &PolymorphicValue,
new: &PolymorphicValue,
success: Ordering,
failure: Ordering,
) -> Result<u64, u64> {
debug_assert!(
new.is_inline(),
"AtomicPolymorphicValue only supports inline values"
);
self.bits
.compare_exchange(current.bits, new.bits, success, failure)
}
#[inline]
pub fn tag(&self) -> ValueTag {
let bits = self.bits.load(Ordering::Relaxed);
ValueTag::from_u8((bits >> TAG_SHIFT) as u8 & 0x07).unwrap_or(ValueTag::Null)
}
#[inline]
pub fn try_as_int(&self, order: Ordering) -> Option<i64> {
let bits = self.bits.load(order);
let tag = ValueTag::from_u8((bits >> TAG_SHIFT) as u8 & 0x07)?;
if tag == ValueTag::SmallInt {
let raw = (bits & INLINE_DATA_MASK) as i64;
let sign_bit = 1i64 << 55;
Some(if raw & sign_bit != 0 {
raw | !INLINE_DATA_MASK as i64
} else {
raw
})
} else {
None
}
}
pub fn fetch_add(&self, delta: i64, order: Ordering) -> Option<i64> {
loop {
let bits = self.bits.load(Ordering::Acquire);
let tag = ValueTag::from_u8((bits >> TAG_SHIFT) as u8 & 0x07)?;
if tag != ValueTag::SmallInt {
return None;
}
let current = {
let raw = (bits & INLINE_DATA_MASK) as i64;
let sign_bit = 1i64 << 55;
if raw & sign_bit != 0 {
raw | !INLINE_DATA_MASK as i64
} else {
raw
}
};
let new_value = current.wrapping_add(delta);
let new_bits = ((ValueTag::SmallInt as u64) << TAG_SHIFT)
| ((new_value as u64) & INLINE_DATA_MASK);
if self
.bits
.compare_exchange_weak(bits, new_bits, order, Ordering::Relaxed)
.is_ok()
{
return Some(current);
}
}
}
}
pub struct CompressedValueArray {
data: Vec<u8>,
len: usize,
}
impl CompressedValueArray {
pub fn from_values(values: &[PolymorphicValue]) -> Self {
let mut data = Vec::new();
let mut i = 0;
while i < values.len() {
let value = &values[i];
let mut run_len = 1usize;
while i + run_len < values.len() && run_len < 255 {
if Self::values_equal(&values[i + run_len], value) {
run_len += 1;
} else {
break;
}
}
if run_len > 1 {
data.push(0xFF); data.push(run_len as u8);
} else {
data.push(0xFE); }
data.extend_from_slice(&value.to_bytes());
i += run_len;
}
Self {
data,
len: values.len(),
}
}
fn values_equal(a: &PolymorphicValue, b: &PolymorphicValue) -> bool {
if a.tag() != b.tag() || a.bits != b.bits {
return false;
}
match (&a.heap, &b.heap) {
(Some(ha), Some(hb)) => ha == hb,
(None, None) => true,
_ => false,
}
}
pub fn len(&self) -> usize {
self.len
}
pub fn is_empty(&self) -> bool {
self.len == 0
}
pub fn compressed_size(&self) -> usize {
self.data.len()
}
pub fn decompress(&self) -> Vec<PolymorphicValue> {
let mut values = Vec::with_capacity(self.len);
let mut i = 0;
while i < self.data.len() {
let marker = self.data[i];
i += 1;
let (run_len, value) = if marker == 0xFF {
let run_len = self.data[i] as usize;
i += 1;
let value = PolymorphicValue::from_bytes(&self.data[i..]);
if let Some(v) = value {
i += v.encoded_size();
(run_len, v)
} else {
break;
}
} else if marker == 0xFE {
let value = PolymorphicValue::from_bytes(&self.data[i..]);
if let Some(v) = value {
i += v.encoded_size();
(1, v)
} else {
break;
}
} else {
break;
};
for _ in 0..run_len {
values.push(value.clone());
}
}
values
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_null_value() {
let v = PolymorphicValue::null();
assert!(v.is_null());
assert!(v.is_inline());
assert_eq!(v.encoded_size(), 8);
}
#[test]
fn test_bool_value() {
let t = PolymorphicValue::bool(true);
let f = PolymorphicValue::bool(false);
assert_eq!(t.as_bool(), Some(true));
assert_eq!(f.as_bool(), Some(false));
assert!(t.is_inline());
assert!(f.is_inline());
}
#[test]
fn test_int_value() {
let v1 = PolymorphicValue::int(42);
let v2 = PolymorphicValue::int(-100);
let v3 = PolymorphicValue::int(0);
let v4 = PolymorphicValue::int(i64::MAX >> 8);
assert_eq!(v1.as_int(), Some(42));
assert_eq!(v2.as_int(), Some(-100));
assert_eq!(v3.as_int(), Some(0));
assert_eq!(v4.as_int(), Some(i64::MAX >> 8));
assert!(v1.is_inline());
assert!(v2.is_inline());
}
#[test]
fn test_inline_string() {
let v = PolymorphicValue::string("hello");
assert!(v.is_inline());
assert_eq!(v.tag(), ValueTag::InlineString);
let bytes = v.inline_bytes_copy().unwrap();
assert_eq!(&bytes, b"hello");
}
#[test]
fn test_heap_string() {
let long_str = "This is a string that is longer than 7 bytes";
let v = PolymorphicValue::string(long_str);
assert!(!v.is_inline());
assert_eq!(v.tag(), ValueTag::HeapString);
assert_eq!(v.as_str(), Some(long_str.to_string()));
}
#[test]
#[allow(clippy::approx_constant)] fn test_float_value() {
let v = PolymorphicValue::float(3.14159);
assert_eq!(v.tag(), ValueTag::Float);
let f = v.as_float().unwrap();
assert!((f - 3.14159).abs() < 1e-10);
}
#[test]
fn test_serialization() {
let values = vec![
PolymorphicValue::null(),
PolymorphicValue::bool(true),
PolymorphicValue::int(42),
PolymorphicValue::string("hi"),
PolymorphicValue::string("This is a longer string"),
];
for v in values {
let bytes = v.to_bytes();
let restored = PolymorphicValue::from_bytes(&bytes).unwrap();
assert_eq!(v.tag(), restored.tag());
assert_eq!(v.bits, restored.bits);
}
}
#[test]
fn test_atomic_value() {
let atomic = AtomicPolymorphicValue::from_inline(&PolymorphicValue::int(0)).unwrap();
let old = atomic.fetch_add(5, Ordering::SeqCst);
assert_eq!(old, Some(0));
let current = atomic.try_as_int(Ordering::Acquire);
assert_eq!(current, Some(5));
}
#[test]
fn test_compressed_array() {
let values: Vec<_> = (0..100)
.map(|i| {
if i < 50 {
PolymorphicValue::int(42)
} else {
PolymorphicValue::int(i as i64)
}
})
.collect();
let compressed = CompressedValueArray::from_values(&values);
assert_eq!(compressed.len(), 100);
let uncompressed_size = 100 * 8; assert!(compressed.compressed_size() < uncompressed_size);
let restored = compressed.decompress();
assert_eq!(restored.len(), 100);
for (i, v) in restored.iter().enumerate() {
let expected = if i < 50 { 42 } else { i as i64 };
assert_eq!(v.as_int(), Some(expected));
}
}
#[test]
fn test_int_edge_cases() {
let max_inline = (1i64 << 55) - 1;
let min_inline = -(1i64 << 55);
let v1 = PolymorphicValue::int(max_inline);
let v2 = PolymorphicValue::int(min_inline);
assert_eq!(v1.as_int(), Some(max_inline));
assert_eq!(v2.as_int(), Some(min_inline));
}
}