use core::num::NonZeroU32;
use std::sync::OnceLock;
use oxc_allocator::{Allocator, Vec as ArenaVec};
use rustc_hash::FxHashMap;
use crate::format::string_field::StringField;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct StringIndex(NonZeroU32);
impl StringIndex {
#[inline]
#[must_use]
pub const fn from_u32(value: u32) -> Option<Self> {
match NonZeroU32::new(value.wrapping_add(1)) {
Some(nz) => Some(StringIndex(nz)),
None => None,
}
}
#[inline]
#[must_use]
pub const fn as_u32(self) -> u32 {
self.0.get() - 1
}
}
#[derive(Debug, Clone, Copy)]
pub enum LeafStringPayload {
Inline {
offset: u32,
length: u8,
},
Index(StringIndex),
}
pub const COMMON_STRING_COUNT: u32 = COMMON_STRINGS.len() as u32;
pub const COMMON_EMPTY: u32 = 0;
pub const COMMON_SPACE: u32 = 1;
pub const COMMON_STAR: u32 = 2;
pub const COMMON_SLASH_STAR: u32 = 3;
pub const COMMON_LF: u32 = 4;
pub const COMMON_TAB: u32 = 5;
pub const COMMON_CRLF: u32 = 6;
pub const COMMON_SLASH_STAR_STAR: u32 = 7;
const COMMON_STRINGS: &[&str] = &[
"",
" ",
"*",
"*/",
"\n",
"\t",
"\r\n",
"/**",
"param",
"returns",
"return",
"throws",
"type",
"see",
"example",
"deprecated",
"since",
"default",
"author",
"internal",
"private",
"public",
"protected",
"static",
"this",
"override",
"readonly",
"yields",
];
#[inline]
pub(crate) fn lookup_common(value: &str) -> Option<u32> {
match value.len() {
0 => Some(0), 1 => match value.as_bytes()[0] {
b' ' => Some(1),
b'*' => Some(2),
b'\n' => Some(4),
b'\t' => Some(5),
_ => None,
},
2 => match value {
"*/" => Some(3),
"\r\n" => Some(6),
_ => None,
},
3 => match value {
"/**" => Some(7),
"see" => Some(13),
_ => None,
},
4 => match value {
"type" => Some(12),
"this" => Some(24),
_ => None,
},
5 => match value {
"param" => Some(8),
"since" => Some(16),
_ => None,
},
6 => match value {
"return" => Some(10),
"throws" => Some(11),
"author" => Some(18),
"public" => Some(21),
"static" => Some(23),
"yields" => Some(27),
_ => None,
},
7 => match value {
"returns" => Some(9),
"example" => Some(14),
"default" => Some(17),
"private" => Some(20),
_ => None,
},
8 => match value {
"internal" => Some(19),
"readonly" => Some(26),
"override" => Some(25),
_ => None,
},
9 => match value {
"protected" => Some(22),
_ => None,
},
10 => match value {
"deprecated" => Some(15),
_ => None,
},
_ => None,
}
}
fn prelude_offsets() -> &'static [u8] {
static CACHE: OnceLock<Vec<u8>> = OnceLock::new();
CACHE.get_or_init(|| {
let mut buf = Vec::with_capacity(COMMON_STRINGS.len() * 8);
let mut pos = 0u32;
for s in COMMON_STRINGS {
let start = pos;
pos += s.len() as u32;
let end = pos;
buf.extend_from_slice(&start.to_le_bytes());
buf.extend_from_slice(&end.to_le_bytes());
}
buf
})
}
fn prelude_data() -> &'static [u8] {
static CACHE: OnceLock<Vec<u8>> = OnceLock::new();
CACHE.get_or_init(|| {
let mut buf = Vec::new();
for s in COMMON_STRINGS {
buf.extend_from_slice(s.as_bytes());
}
buf
})
}
fn common_string_fields() -> &'static [StringField] {
static CACHE: OnceLock<Vec<StringField>> = OnceLock::new();
CACHE.get_or_init(|| {
let mut fields = Vec::with_capacity(COMMON_STRINGS.len());
let mut pos = 0u32;
for s in COMMON_STRINGS {
let length = u16::try_from(s.len()).expect("common string longer than u16");
fields.push(StringField {
offset: pos,
length,
});
pos += s.len() as u32;
}
fields
})
}
#[inline]
pub fn common_string_field(idx: u32) -> StringField {
common_string_fields()[idx as usize]
}
#[derive(Debug, Clone, Copy)]
struct DedupEntry {
field: StringField,
index: Option<StringIndex>,
}
pub struct StringTableBuilder<'arena> {
pub(crate) offsets_buffer: ArenaVec<'arena, u8>,
pub(crate) data_buffer: ArenaVec<'arena, u8>,
pub(crate) count: u32,
arena: &'arena Allocator,
dedup: FxHashMap<&'arena str, DedupEntry>,
last_key_ptr: *const u8,
last_key_len: usize,
last_entry: DedupEntry,
}
impl<'arena> StringTableBuilder<'arena> {
#[must_use]
pub fn new(arena: &'arena Allocator) -> Self {
let mut offsets_buffer = ArenaVec::new_in(arena);
offsets_buffer.extend_from_slice(prelude_offsets());
let mut data_buffer = ArenaVec::new_in(arena);
data_buffer.extend_from_slice(prelude_data());
StringTableBuilder {
offsets_buffer,
data_buffer,
count: COMMON_STRING_COUNT,
arena,
dedup: FxHashMap::default(),
last_key_ptr: core::ptr::null(),
last_key_len: 0,
last_entry: DedupEntry {
field: StringField::NONE,
index: None,
},
}
}
pub fn intern(&mut self, value: &str) -> StringField {
if value.as_ptr() == self.last_key_ptr && value.len() == self.last_key_len {
return self.last_entry.field;
}
if let Some(idx) = lookup_common(value) {
let field = common_string_field(idx);
self.update_recent_cache(value, DedupEntry { field, index: None });
return field;
}
if let Some(entry) = self.dedup.get(value) {
let snapshot = *entry;
self.update_recent_cache(value, snapshot);
return snapshot.field;
}
let field = self.append_no_dedup(value);
let key: &'arena str = self.arena.alloc_str(value);
let entry = DedupEntry { field, index: None };
self.dedup.insert(key, entry);
self.update_recent_cache(value, entry);
field
}
pub fn intern_for_leaf(&mut self, value: &str) -> StringIndex {
if value.as_ptr() == self.last_key_ptr && value.len() == self.last_key_len {
if let Some(idx) = self.last_entry.index {
return idx;
}
}
if let Some(idx) = lookup_common(value) {
return StringIndex::from_u32(idx).expect("common index in range");
}
if let Some(entry) = self.dedup.get(value) {
if let Some(idx) = entry.index {
let snapshot = *entry;
self.update_recent_cache(value, snapshot);
return idx;
}
let field = entry.field;
let idx = self.assign_index(field);
let entry = DedupEntry {
field,
index: Some(idx),
};
if let Some(entry_mut) = self.dedup.get_mut(value) {
*entry_mut = entry;
}
self.update_recent_cache(value, entry);
return idx;
}
let field = self.append_no_dedup(value);
let idx = self.assign_index(field);
let key: &'arena str = self.arena.alloc_str(value);
let entry = DedupEntry {
field,
index: Some(idx),
};
self.dedup.insert(key, entry);
self.update_recent_cache(value, entry);
idx
}
pub fn intern_unique(&mut self, value: &str) -> StringField {
self.append_no_dedup(value)
}
#[inline]
pub fn intern_at_offset(&mut self, start: u32, end: u32) -> StringField {
debug_assert!(
(end as usize) <= self.data_buffer.len(),
"intern_at_offset range [{start}, {end}) extends past data_buffer length {}",
self.data_buffer.len()
);
debug_assert!(start <= end, "intern_at_offset start > end");
debug_assert!(
(end - start) <= u16::MAX as u32,
"source slice length {} exceeds u16 (StringField max)",
end - start
);
StringField {
offset: start,
length: (end - start) as u16,
}
}
pub fn intern_at_offset_for_leaf(&mut self, start: u32, end: u32) -> StringIndex {
debug_assert!(
(end as usize) <= self.data_buffer.len(),
"intern_at_offset_for_leaf range [{start}, {end}) extends past data_buffer length {}",
self.data_buffer.len()
);
debug_assert!(start <= end, "intern_at_offset_for_leaf start > end");
push_offset_pair(&mut self.offsets_buffer, start, end);
let idx = StringIndex::from_u32(self.count).expect("string index overflow");
self.count = self.count.checked_add(1).expect("string table overflow");
idx
}
pub fn append_source_text(&mut self, value: &str) -> u32 {
let offset = self.data_buffer.len() as u32;
self.data_buffer.extend_from_slice(value.as_bytes());
offset
}
pub(crate) fn reset(&mut self) {
let prelude_offsets_bytes = prelude_offsets();
let prelude_data_bytes = prelude_data();
self.offsets_buffer.truncate(0);
self.offsets_buffer.extend_from_slice(prelude_offsets_bytes);
self.data_buffer.truncate(0);
self.data_buffer.extend_from_slice(prelude_data_bytes);
self.count = COMMON_STRING_COUNT;
self.dedup.clear();
self.last_key_ptr = core::ptr::null();
self.last_key_len = 0;
self.last_entry = DedupEntry {
field: StringField::NONE,
index: None,
};
}
#[inline]
#[must_use]
pub const fn len(&self) -> u32 {
self.count
}
#[inline]
#[must_use]
pub const fn is_empty(&self) -> bool {
self.count == 0
}
#[inline]
fn update_recent_cache(&mut self, value: &str, entry: DedupEntry) {
self.last_key_ptr = value.as_ptr();
self.last_key_len = value.len();
self.last_entry = entry;
}
#[inline]
fn append_no_dedup(&mut self, value: &str) -> StringField {
debug_assert!(
value.len() <= u16::MAX as usize,
"string length {} exceeds u16 (StringField max)",
value.len()
);
let offset = self.data_buffer.len() as u32;
self.data_buffer.extend_from_slice(value.as_bytes());
let length = value.len() as u16;
StringField { offset, length }
}
fn assign_index(&mut self, field: StringField) -> StringIndex {
let idx = StringIndex::from_u32(self.count).expect("string index overflow");
self.count = self.count.checked_add(1).expect("string table overflow");
let end = field.offset + field.length as u32;
push_offset_pair(&mut self.offsets_buffer, field.offset, end);
idx
}
}
#[inline]
fn push_offset_pair(buf: &mut ArenaVec<'_, u8>, start: u32, end: u32) {
let mut bytes = [0u8; 8];
bytes[..4].copy_from_slice(&start.to_le_bytes());
bytes[4..].copy_from_slice(&end.to_le_bytes());
buf.extend_from_slice(&bytes);
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn string_index_round_trips() {
for raw in [0u32, 1, 100, 0xFFFE, 0x3FFF_FFFE] {
let idx = StringIndex::from_u32(raw).unwrap();
assert_eq!(idx.as_u32(), raw);
}
}
#[test]
fn string_index_rejects_overflow() {
assert!(StringIndex::from_u32(u32::MAX).is_none());
}
#[test]
fn intern_dedups_repeated_values_as_string_field() {
let arena = Allocator::default();
let mut builder = StringTableBuilder::new(&arena);
let a = builder.intern("custom_alpha");
let b = builder.intern("custom_beta");
let a_again = builder.intern("custom_alpha");
assert_eq!(a, a_again);
assert_ne!(a, b);
}
#[test]
fn intern_for_leaf_is_lazy_about_offsets_entry() {
let arena = Allocator::default();
let mut builder = StringTableBuilder::new(&arena);
let prelude_count = COMMON_STRING_COUNT;
let _f = builder.intern("custom_xyz");
assert_eq!(
builder.len(),
prelude_count,
"no offsets entry for ED-only intern"
);
let _idx = builder.intern_for_leaf("custom_xyz");
assert_eq!(
builder.len(),
prelude_count + 1,
"leaf intern allocates offsets entry"
);
let _idx2 = builder.intern_for_leaf("custom_xyz");
assert_eq!(
builder.len(),
prelude_count + 1,
"dedup on second leaf intern"
);
}
#[test]
fn append_source_text_does_not_register_an_index() {
let arena = Allocator::default();
let mut builder = StringTableBuilder::new(&arena);
let common_data_bytes = builder.data_buffer.len() as u32;
let off = builder.append_source_text("/** @x */");
assert_eq!(
off, common_data_bytes,
"source text starts immediately after the common-string prelude"
);
assert_eq!(
builder.len(),
COMMON_STRING_COUNT,
"source text does not count as an offsets entry"
);
}
#[test]
fn intern_common_string_returns_predetermined_field() {
let arena = Allocator::default();
let mut builder = StringTableBuilder::new(&arena);
let field = builder.intern("param");
let expected = common_string_fields()[8];
assert_eq!(field, expected);
}
#[test]
fn intern_for_leaf_common_string_returns_predetermined_index() {
let arena = Allocator::default();
let mut builder = StringTableBuilder::new(&arena);
assert_eq!(builder.intern_for_leaf("param").as_u32(), 8);
let pre_count = builder.len();
assert_eq!(builder.intern_for_leaf("param").as_u32(), 8);
assert_eq!(builder.len(), pre_count, "fast path must not append");
}
}