use crate::file_header::{
write_file_header, FILE_MAGIC_STRINGTABLE_DATA, FILE_MAGIC_STRINGTABLE_INDEX,
};
use crate::serialization::Addr;
use crate::serialization::SerializationSink;
use std::{error::Error, sync::Arc};
#[derive(Clone, Copy, Eq, PartialEq, Debug, Hash)]
#[repr(C)]
pub struct StringId(u64);
impl StringId {
pub const INVALID: StringId = StringId(INVALID_STRING_ID);
#[inline]
pub fn new(id: impl Into<u64>) -> StringId {
StringId(id.into())
}
#[inline]
pub fn new_virtual(id: impl Into<u64>) -> StringId {
let id = id.into();
assert!(id <= MAX_USER_VIRTUAL_STRING_ID);
StringId(id)
}
#[inline]
pub fn is_virtual(self) -> bool {
self.0 <= METADATA_STRING_ID
}
#[inline]
pub fn as_u64(self) -> u64 {
self.0
}
#[inline]
pub fn from_addr(addr: Addr) -> StringId {
let id = addr.0.checked_add(FIRST_REGULAR_STRING_ID).unwrap();
StringId::new(id)
}
#[inline]
pub fn to_addr(self) -> Addr {
Addr(self.0.checked_sub(FIRST_REGULAR_STRING_ID).unwrap())
}
}
pub const TERMINATOR: u8 = 0xFF;
pub const STRING_REF_TAG: u8 = 0xFE;
pub const STRING_REF_ENCODED_SIZE: usize = 9;
const MAX_USER_VIRTUAL_STRING_ID: u64 = 100_000_000;
pub const METADATA_STRING_ID: u64 = MAX_USER_VIRTUAL_STRING_ID + 1;
const INVALID_STRING_ID: u64 = METADATA_STRING_ID + 1;
pub const FIRST_REGULAR_STRING_ID: u64 = INVALID_STRING_ID + 1;
pub struct StringTableBuilder {
data_sink: Arc<SerializationSink>,
index_sink: Arc<SerializationSink>,
}
pub trait SerializableString {
fn serialized_size(&self) -> usize;
fn serialize(&self, bytes: &mut [u8]);
}
impl SerializableString for str {
#[inline]
fn serialized_size(&self) -> usize {
self.len() + 1 }
#[inline]
fn serialize(&self, bytes: &mut [u8]) {
let last_byte_index = bytes.len() - 1;
bytes[0..last_byte_index].copy_from_slice(self.as_bytes());
bytes[last_byte_index] = TERMINATOR;
}
}
pub enum StringComponent<'s> {
Value(&'s str),
Ref(StringId),
}
impl<'s> StringComponent<'s> {
#[inline]
fn serialized_size(&self) -> usize {
match *self {
StringComponent::Value(s) => s.len(),
StringComponent::Ref(_) => STRING_REF_ENCODED_SIZE,
}
}
#[inline]
fn serialize<'b>(&self, bytes: &'b mut [u8]) -> &'b mut [u8] {
match *self {
StringComponent::Value(s) => {
bytes[..s.len()].copy_from_slice(s.as_bytes());
&mut bytes[s.len()..]
}
StringComponent::Ref(string_id) => {
assert!(STRING_REF_ENCODED_SIZE == 9);
bytes[0] = STRING_REF_TAG;
bytes[1..9].copy_from_slice(&string_id.0.to_le_bytes());
&mut bytes[9..]
}
}
}
}
impl<'a> SerializableString for [StringComponent<'a>] {
#[inline]
fn serialized_size(&self) -> usize {
self.iter().map(|c| c.serialized_size()).sum::<usize>() + 1 }
#[inline]
fn serialize(&self, mut bytes: &mut [u8]) {
assert!(bytes.len() == self.serialized_size());
for component in self.iter() {
bytes = component.serialize(bytes);
}
assert!(bytes.len() == 1);
bytes[0] = TERMINATOR;
}
}
macro_rules! impl_serializable_string_for_fixed_size {
($n:expr) => {
impl<'a> SerializableString for [StringComponent<'a>; $n] {
#[inline(always)]
fn serialized_size(&self) -> usize {
(&self[..]).serialized_size()
}
#[inline(always)]
fn serialize(&self, bytes: &mut [u8]) {
(&self[..]).serialize(bytes);
}
}
};
}
impl_serializable_string_for_fixed_size!(0);
impl_serializable_string_for_fixed_size!(1);
impl_serializable_string_for_fixed_size!(2);
impl_serializable_string_for_fixed_size!(3);
impl_serializable_string_for_fixed_size!(4);
impl_serializable_string_for_fixed_size!(5);
impl_serializable_string_for_fixed_size!(6);
impl_serializable_string_for_fixed_size!(7);
impl_serializable_string_for_fixed_size!(8);
impl_serializable_string_for_fixed_size!(9);
impl_serializable_string_for_fixed_size!(10);
impl_serializable_string_for_fixed_size!(11);
impl_serializable_string_for_fixed_size!(12);
impl_serializable_string_for_fixed_size!(13);
impl_serializable_string_for_fixed_size!(14);
impl_serializable_string_for_fixed_size!(15);
impl_serializable_string_for_fixed_size!(16);
fn serialize_index_entry(sink: &SerializationSink, id: StringId, addr: Addr) {
sink.write_atomic(16, |bytes| {
bytes[0..8].copy_from_slice(&id.0.to_le_bytes());
bytes[8..16].copy_from_slice(&addr.0.to_le_bytes());
});
}
impl StringTableBuilder {
pub fn new(
data_sink: Arc<SerializationSink>,
index_sink: Arc<SerializationSink>,
) -> Result<StringTableBuilder, Box<dyn Error + Send + Sync>> {
write_file_header(&mut data_sink.as_std_write(), FILE_MAGIC_STRINGTABLE_DATA)?;
write_file_header(&mut index_sink.as_std_write(), FILE_MAGIC_STRINGTABLE_INDEX)?;
Ok(StringTableBuilder {
data_sink,
index_sink,
})
}
pub fn map_virtual_to_concrete_string(&self, virtual_id: StringId, concrete_id: StringId) {
assert!(virtual_id.0 <= MAX_USER_VIRTUAL_STRING_ID);
serialize_index_entry(&*self.index_sink, virtual_id, concrete_id.to_addr());
}
pub fn bulk_map_virtual_to_single_concrete_string<I>(
&self,
virtual_ids: I,
concrete_id: StringId,
) where
I: Iterator<Item = StringId> + ExactSizeIterator,
{
type MappingEntry = [u64; 2];
assert!(std::mem::size_of::<MappingEntry>() == 16);
let to_addr_le = concrete_id.to_addr().0.to_le();
let serialized: Vec<MappingEntry> = virtual_ids
.map(|from| {
let id = from.0;
assert!(id <= MAX_USER_VIRTUAL_STRING_ID);
[id.to_le(), to_addr_le]
})
.collect();
let num_bytes = serialized.len() * std::mem::size_of::<MappingEntry>();
let byte_ptr = serialized.as_ptr() as *const u8;
let bytes = unsafe { std::slice::from_raw_parts(byte_ptr, num_bytes) };
self.index_sink.write_bytes_atomic(bytes);
}
pub fn alloc_metadata<STR: SerializableString + ?Sized>(&self, s: &STR) {
let concrete_id = self.alloc(s);
let virtual_id = StringId(METADATA_STRING_ID);
assert!(virtual_id.is_virtual());
serialize_index_entry(&*self.index_sink, virtual_id, concrete_id.to_addr());
}
pub fn alloc<STR: SerializableString + ?Sized>(&self, s: &STR) -> StringId {
let size_in_bytes = s.serialized_size();
let addr = self.data_sink.write_atomic(size_in_bytes, |mem| {
s.serialize(mem);
});
StringId::from_addr(addr)
}
}