pub mod atomic_ptr;
pub mod bucket_char;
pub mod node16_char;
pub mod node48_char;
pub mod node4_char;
pub mod persistent_node;
pub use atomic_ptr::AtomicNodePtr;
pub use bucket_char::CharBucket;
pub use node16_char::CharNode16;
pub use node48_char::CharNode48;
pub use node4_char::CharNode4;
pub use persistent_node::PersistentCharNode;
use crate::persistent_artrie::swizzled_ptr::SwizzledPtr;
pub const CHAR_MAX_PREFIX_LEN: usize = 6;
pub mod flags {
pub const IS_FINAL: u8 = 0b0000_0001;
pub const IS_DIRTY: u8 = 0b0000_0010;
pub const IS_LEAF: u8 = 0b0000_0100;
}
#[repr(C)]
#[derive(Debug, Clone)]
pub struct CharNodeHeader {
pub node_type: u8,
pub prefix_len: u8,
pub flags: u8,
pub _padding: u8,
pub num_children: u16,
pub _padding2: [u8; 2],
pub version: u64,
}
impl CharNodeHeader {
pub fn new(node_type: u8) -> Self {
Self {
node_type,
prefix_len: 0,
flags: 0,
_padding: 0,
num_children: 0,
_padding2: [0; 2],
version: 0,
}
}
#[inline]
pub fn is_final(&self) -> bool {
self.flags & flags::IS_FINAL != 0
}
#[inline]
pub fn set_final(&mut self, final_state: bool) {
if final_state {
self.flags |= flags::IS_FINAL;
} else {
self.flags &= !flags::IS_FINAL;
}
}
#[inline]
pub fn is_dirty(&self) -> bool {
self.flags & flags::IS_DIRTY != 0
}
#[inline]
pub fn set_dirty(&mut self, dirty: bool) {
if dirty {
self.flags |= flags::IS_DIRTY;
} else {
self.flags &= !flags::IS_DIRTY;
}
}
#[inline]
pub fn is_leaf(&self) -> bool {
self.flags & flags::IS_LEAF != 0
}
#[inline]
pub fn set_leaf(&mut self, leaf: bool) {
if leaf {
self.flags |= flags::IS_LEAF;
} else {
self.flags &= !flags::IS_LEAF;
}
}
#[inline]
pub fn increment_version(&mut self) {
self.version = self.version.wrapping_add(1);
}
}
impl Default for CharNodeHeader {
fn default() -> Self {
Self::new(0)
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy)]
pub struct CharCompressedPrefix {
pub chars: [u32; CHAR_MAX_PREFIX_LEN],
}
impl CharCompressedPrefix {
pub const fn empty() -> Self {
Self {
chars: [0; CHAR_MAX_PREFIX_LEN],
}
}
pub fn from_chars(chars: &[u32]) -> Self {
assert!(
chars.len() <= CHAR_MAX_PREFIX_LEN,
"prefix too long: {} > {}",
chars.len(),
CHAR_MAX_PREFIX_LEN
);
let mut prefix = Self::empty();
prefix.chars[..chars.len()].copy_from_slice(chars);
prefix
}
pub fn from_char_iter<I: IntoIterator<Item = char>>(chars: I) -> Self {
let mut prefix = Self::empty();
for (i, c) in chars.into_iter().take(CHAR_MAX_PREFIX_LEN).enumerate() {
prefix.chars[i] = c as u32;
}
prefix
}
pub fn match_key(&self, key: &[u32], prefix_len: usize) -> usize {
let check_len = prefix_len.min(key.len()).min(CHAR_MAX_PREFIX_LEN);
for i in 0..check_len {
if self.chars[i] != key[i] {
return i;
}
}
check_len
}
pub fn as_slice(&self, len: usize) -> &[u32] {
&self.chars[..len.min(CHAR_MAX_PREFIX_LEN)]
}
pub fn to_chars(&self, len: usize) -> Vec<char> {
self.chars[..len.min(CHAR_MAX_PREFIX_LEN)]
.iter()
.filter_map(|&c| char::from_u32(c))
.collect()
}
}
impl Default for CharCompressedPrefix {
fn default() -> Self {
Self::empty()
}
}
#[derive(Debug, Clone)]
pub enum CharNode {
N4(Box<CharNode4>),
N16(Box<CharNode16>),
N48(Box<CharNode48>),
Bucket(Box<CharBucket>),
}
impl CharNode {
pub fn header(&self) -> &CharNodeHeader {
match self {
CharNode::N4(n) => &n.header,
CharNode::N16(n) => &n.header,
CharNode::N48(n) => &n.header,
CharNode::Bucket(n) => &n.header,
}
}
pub fn header_mut(&mut self) -> &mut CharNodeHeader {
match self {
CharNode::N4(n) => &mut n.header,
CharNode::N16(n) => &mut n.header,
CharNode::N48(n) => &mut n.header,
CharNode::Bucket(n) => &mut n.header,
}
}
pub fn prefix(&self) -> &CharCompressedPrefix {
match self {
CharNode::N4(n) => &n.prefix,
CharNode::N16(n) => &n.prefix,
CharNode::N48(n) => &n.prefix,
CharNode::Bucket(n) => &n.prefix,
}
}
pub fn prefix_mut(&mut self) -> &mut CharCompressedPrefix {
match self {
CharNode::N4(n) => &mut n.prefix,
CharNode::N16(n) => &mut n.prefix,
CharNode::N48(n) => &mut n.prefix,
CharNode::Bucket(n) => &mut n.prefix,
}
}
#[inline]
pub fn is_final(&self) -> bool {
self.header().is_final()
}
pub fn find_child(&self, key: u32) -> Option<&SwizzledPtr> {
match self {
CharNode::N4(n) => n.find_child(key),
CharNode::N16(n) => n.find_child(key),
CharNode::N48(n) => n.find_child(key),
CharNode::Bucket(n) => n.find_child(key),
}
}
pub fn find_child_mut(&mut self, key: u32) -> Option<&mut SwizzledPtr> {
match self {
CharNode::N4(n) => n.find_child_mut(key),
CharNode::N16(n) => n.find_child_mut(key),
CharNode::N48(n) => n.find_child_mut(key),
CharNode::Bucket(n) => n.find_child_mut(key),
}
}
pub fn iter_children(&self) -> Box<dyn Iterator<Item = (u32, &SwizzledPtr)> + '_> {
match self {
CharNode::N4(n) => Box::new(n.iter_children()),
CharNode::N16(n) => Box::new(n.iter_children()),
CharNode::N48(n) => Box::new(n.iter_children()),
CharNode::Bucket(n) => Box::new(n.iter_children()),
}
}
#[inline]
pub fn num_children(&self) -> usize {
self.header().num_children as usize
}
pub fn is_full(&self) -> bool {
match self {
CharNode::N4(n) => n.is_full(),
CharNode::N16(n) => n.is_full(),
CharNode::N48(n) => n.is_full(),
CharNode::Bucket(_) => false, }
}
pub fn should_grow(&self) -> bool {
self.is_full()
}
pub fn should_shrink(&self) -> bool {
match self {
CharNode::N4(_) => false,
CharNode::N16(n) => n.header.num_children <= 4,
CharNode::N48(n) => n.header.num_children <= 16,
CharNode::Bucket(n) => n.header.num_children <= 48,
}
}
pub fn new_node4() -> Self {
CharNode::N4(Box::new(CharNode4::new()))
}
pub fn new_node16() -> Self {
CharNode::N16(Box::new(CharNode16::new()))
}
pub fn new_node48() -> Self {
CharNode::N48(Box::new(CharNode48::new()))
}
pub fn new_bucket() -> Self {
CharNode::Bucket(Box::new(CharBucket::new()))
}
pub fn grow(&self) -> Option<CharNode> {
match self {
CharNode::N4(n) => Some(CharNode::N16(Box::new(n.grow()))),
CharNode::N16(n) => Some(CharNode::N48(Box::new(n.grow()))),
CharNode::N48(n) => Some(CharNode::Bucket(Box::new(n.grow()))),
CharNode::Bucket(_) => None, }
}
pub fn shrink(&self) -> Option<CharNode> {
match self {
CharNode::N4(_) => None,
CharNode::N16(n) => Some(CharNode::N4(Box::new(n.shrink()))),
CharNode::N48(n) => Some(CharNode::N16(Box::new(n.shrink()))),
CharNode::Bucket(n) => Some(CharNode::N48(Box::new(n.shrink()))),
}
}
pub fn add_child_growing(
&mut self,
key: u32,
child: SwizzledPtr,
) -> Result<Option<CharNode>, AddChildError> {
match self {
CharNode::N4(n) => {
if n.is_full() {
let mut grown = n.grow();
grown.add_child(key, child)?;
Ok(Some(CharNode::N16(Box::new(grown))))
} else {
n.add_child(key, child)?;
Ok(None)
}
}
CharNode::N16(n) => {
if n.is_full() {
let mut grown = n.grow();
grown.add_child(key, child)?;
Ok(Some(CharNode::N48(Box::new(grown))))
} else {
n.add_child(key, child)?;
Ok(None)
}
}
CharNode::N48(n) => {
if n.is_full() {
let mut grown = n.grow();
grown.add_child(key, child)?;
Ok(Some(CharNode::Bucket(Box::new(grown))))
} else {
n.add_child(key, child)?;
Ok(None)
}
}
CharNode::Bucket(n) => {
n.add_child(key, child)?;
Ok(None)
}
}
}
pub fn remove_child_shrinking(&mut self, key: u32) -> Option<(SwizzledPtr, Option<CharNode>)> {
match self {
CharNode::N4(n) => n.remove_child(key).map(|removed| (removed, None)),
CharNode::N16(n) => {
if let Some(removed) = n.remove_child(key) {
if n.header.num_children <= 4 {
Some((removed, Some(CharNode::N4(Box::new(n.shrink())))))
} else {
Some((removed, None))
}
} else {
None
}
}
CharNode::N48(n) => {
if let Some(removed) = n.remove_child(key) {
if n.header.num_children <= 16 {
Some((removed, Some(CharNode::N16(Box::new(n.shrink())))))
} else {
Some((removed, None))
}
} else {
None
}
}
CharNode::Bucket(n) => {
if let Some(removed) = n.remove_child(key) {
if n.header.num_children <= 48 {
Some((removed, Some(CharNode::N48(Box::new(n.shrink())))))
} else {
Some((removed, None))
}
} else {
None
}
}
}
}
}
pub trait CharArtNode {
fn find_child(&self, key: u32) -> Option<&SwizzledPtr>;
fn find_child_mut(&mut self, key: u32) -> Option<&mut SwizzledPtr>;
fn add_child(&mut self, key: u32, child: SwizzledPtr) -> Result<(), AddChildError>;
fn remove_child(&mut self, key: u32) -> Option<SwizzledPtr>;
fn is_full(&self) -> bool;
fn iter_children(&self) -> impl Iterator<Item = (u32, &SwizzledPtr)>;
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AddChildError {
NodeFull,
KeyExists,
}
impl std::fmt::Display for AddChildError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
AddChildError::NodeFull => write!(f, "node is full"),
AddChildError::KeyExists => write!(f, "key already exists"),
}
}
}
impl std::error::Error for AddChildError {}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ChildStorage {
Direct,
Sequential {
arena_id: u32,
first_slot: u32,
},
}
impl ChildStorage {
#[inline]
pub fn is_direct(&self) -> bool {
matches!(self, ChildStorage::Direct)
}
#[inline]
pub fn is_sequential(&self) -> bool {
matches!(self, ChildStorage::Sequential { .. })
}
pub fn first_slot(&self) -> Option<u32> {
match self {
ChildStorage::Sequential { first_slot, .. } => Some(*first_slot),
ChildStorage::Direct => None,
}
}
pub fn arena_id(&self) -> Option<u32> {
match self {
ChildStorage::Sequential { arena_id, .. } => Some(*arena_id),
ChildStorage::Direct => None,
}
}
pub fn sequential(arena_id: u32, first_slot: u32) -> Self {
ChildStorage::Sequential {
arena_id,
first_slot,
}
}
}
impl Default for ChildStorage {
fn default() -> Self {
ChildStorage::Direct
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_char_node_header_flags() {
let mut header = CharNodeHeader::new(4);
assert!(!header.is_final());
assert!(!header.is_dirty());
assert!(!header.is_leaf());
header.set_final(true);
assert!(header.is_final());
header.set_dirty(true);
assert!(header.is_dirty());
header.set_leaf(true);
assert!(header.is_leaf());
header.set_final(false);
assert!(!header.is_final());
assert!(header.is_dirty()); }
#[test]
fn test_char_node_header_version() {
let mut header = CharNodeHeader::new(4);
assert_eq!(header.version, 0);
header.increment_version();
assert_eq!(header.version, 1);
header.increment_version();
assert_eq!(header.version, 2);
}
#[test]
fn test_char_compressed_prefix() {
let chars: Vec<u32> = "hello".chars().map(|c| c as u32).collect();
let prefix = CharCompressedPrefix::from_chars(&chars);
assert_eq!(prefix.as_slice(5), &chars[..]);
assert_eq!(prefix.as_slice(3), &chars[..3]);
}
#[test]
fn test_char_prefix_from_rust_chars() {
let prefix = CharCompressedPrefix::from_char_iter("hello".chars());
assert_eq!(prefix.to_chars(5), vec!['h', 'e', 'l', 'l', 'o']);
}
#[test]
fn test_char_prefix_unicode() {
let prefix = CharCompressedPrefix::from_char_iter("日本🎉".chars());
assert_eq!(prefix.to_chars(3), vec!['日', '本', '🎉']);
}
#[test]
fn test_char_prefix_matching() {
let chars: Vec<u32> = "hello".chars().map(|c| c as u32).collect();
let prefix = CharCompressedPrefix::from_chars(&chars);
let key: Vec<u32> = "hello world".chars().map(|c| c as u32).collect();
assert_eq!(prefix.match_key(&key, 5), 5);
let key: Vec<u32> = "help".chars().map(|c| c as u32).collect();
assert_eq!(prefix.match_key(&key, 5), 3);
let key: Vec<u32> = "world".chars().map(|c| c as u32).collect();
assert_eq!(prefix.match_key(&key, 5), 0);
}
#[test]
#[should_panic(expected = "prefix too long")]
fn test_char_prefix_too_long() {
let chars: Vec<u32> = "1234567".chars().map(|c| c as u32).collect();
CharCompressedPrefix::from_chars(&chars);
}
#[test]
fn test_child_storage_direct() {
let storage = ChildStorage::Direct;
assert!(storage.is_direct());
assert!(!storage.is_sequential());
assert_eq!(storage.first_slot(), None);
assert_eq!(storage.arena_id(), None);
}
#[test]
fn test_child_storage_sequential() {
let storage = ChildStorage::sequential(5, 100);
assert!(!storage.is_direct());
assert!(storage.is_sequential());
assert_eq!(storage.arena_id(), Some(5));
assert_eq!(storage.first_slot(), Some(100));
}
#[test]
fn test_child_storage_default() {
let storage = ChildStorage::default();
assert!(storage.is_direct());
}
#[test]
fn test_child_storage_equality() {
let s1 = ChildStorage::sequential(1, 10);
let s2 = ChildStorage::sequential(1, 10);
let s3 = ChildStorage::sequential(1, 20);
let s4 = ChildStorage::Direct;
assert_eq!(s1, s2);
assert_ne!(s1, s3);
assert_ne!(s1, s4);
assert_eq!(s4, ChildStorage::Direct);
}
}