#![forbid(
missing_docs,
unsafe_op_in_unsafe_fn,
clippy::missing_safety_doc,
clippy::multiple_unsafe_ops_per_block,
clippy::undocumented_unsafe_blocks
)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#[cfg(feature = "delta")]
mod delta;
mod detail;
use blazinterner::{ArenaSlice, ArenaStr, InternedSlice, InternedStr};
#[cfg(feature = "delta")]
pub use delta::DeltaEncoding;
pub use detail::mapping::Mapping;
use detail::mapping::{MappingNoStrings, MappingStrings, RevMappingImpl};
pub use detail::{IValue, InternedStrKey, MapRef, ValueRef};
#[cfg(feature = "get-size2")]
use get_size2::GetSize;
use serde_json::Value;
#[cfg(feature = "serde")]
use serde_tuple::{Deserialize_tuple, Serialize_tuple};
use std::cmp::Ordering;
#[derive(Default, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize_tuple, Deserialize_tuple))]
#[cfg_attr(feature = "get-size2", derive(GetSize))]
pub struct Jinterners {
string: ArenaStr,
iarray: ArenaSlice<IValue>,
iobject: ArenaSlice<(InternedStrKey, IValue)>,
}
#[cfg(feature = "get-size2")]
impl Jinterners {
pub fn get_size_strings(&self) -> usize {
self.string.get_size()
}
pub fn get_size_arrays(&self) -> usize {
self.iarray.get_size()
}
pub fn get_size_objects(&self) -> usize {
self.iobject.get_size()
}
}
#[cfg(feature = "debug")]
impl Jinterners {
pub fn print_summary_strings(&self, prefix: &str, title: &str, total_bytes: usize) {
self.string.print_summary(prefix, title, total_bytes);
}
pub fn print_summary_arrays(&self, prefix: &str, title: &str, total_bytes: usize) {
self.iarray.print_summary(prefix, title, total_bytes);
}
pub fn print_summary_objects(&self, prefix: &str, title: &str, total_bytes: usize) {
self.iobject.print_summary(prefix, title, total_bytes);
}
}
impl Jinterners {
pub fn intern(&self, source: Value) -> IValue {
IValue::from(self, source)
}
pub fn intern_ref(&self, source: &Value) -> IValue {
IValue::from_ref(self, source)
}
pub fn find_key(&self, key: &str) -> Option<InternedStrKey> {
self.string.find(key).map(InternedStrKey)
}
pub fn optimize(&self, limit: Option<usize>) -> Option<(Jinterners, Mapping)> {
if limit == Some(0) {
return None;
}
let mut optimized = self.optimize_once_strings().map(|(jinterners, mapping)| {
let mapping = mapping.promote(
jinterners.iarray.slices() as u32,
jinterners.iobject.slices() as u32,
);
(jinterners, mapping)
});
let mut i = 0;
loop {
if limit == Some(i) {
break;
}
let jinterners = match optimized {
None => self,
Some((ref jinterners, _)) => jinterners,
};
let (jinterners, mapping) = match jinterners.optimize_once_no_strings() {
None => break,
Some((iarray, iobject, mapping_opt)) => match optimized {
None => {
let num_strings = self.string.strings() as u32;
let mut string =
ArenaStr::with_capacity(self.string.strings(), self.string.bytes());
for i in 0..num_strings {
string.push_mut(self.string.lookup(InternedStr::from_id(i)));
}
(
Jinterners {
string,
iarray,
iobject,
},
mapping_opt.promote(num_strings),
)
}
Some((mut jinterners, mapping)) => {
jinterners.iarray = iarray;
jinterners.iobject = iobject;
(jinterners, mapping.compose(mapping_opt))
}
},
};
optimized = Some((jinterners, mapping));
i = i.wrapping_add(1);
}
optimized
}
pub fn optimize_once(&self) -> Option<(Jinterners, Mapping)> {
let string_rev = self.optimized_mapping_strings();
let iarray_rev = self.optimized_mapping_arrays();
let iobject_rev = self.optimized_mapping_objects();
let mapping = Mapping {
string: string_rev.reverse(),
iarray: iarray_rev.reverse(),
iobject: iobject_rev.reverse(),
};
if mapping.is_identity() {
return None;
}
let mut jinterners = Jinterners {
string: ArenaStr::with_capacity(self.string.strings(), self.string.bytes()),
iarray: ArenaSlice::with_capacity(self.iarray.slices(), self.iarray.items()),
iobject: ArenaSlice::with_capacity(self.iobject.slices(), self.iobject.items()),
};
for i in string_rev.iter() {
jinterners
.string
.push_mut(self.string.lookup(InternedStr::from_id(i)));
}
for i in iarray_rev.iter() {
let array = self.iarray.lookup(InternedSlice::from_id(i));
let iter = array.iter().map(|ivalue| mapping.map(*ivalue));
unsafe { jinterners.iarray.push_iter_mut(iter) };
}
let mut buffer = Vec::new();
for i in iobject_rev.iter() {
let object = self.iobject.lookup(InternedSlice::from_id(i));
buffer.extend(
object
.iter()
.map(|(k, ivalue)| (mapping.map_str_key(*k), mapping.map(*ivalue))),
);
buffer.sort_unstable_by_key(|(k, _)| *k);
jinterners.iobject.push_copy_mut(&buffer);
buffer.clear();
}
Some((jinterners, mapping))
}
fn optimize_once_strings(&self) -> Option<(Jinterners, MappingStrings)> {
let string_rev = self.optimized_mapping_strings();
let mapping = MappingStrings {
string: string_rev.reverse(),
};
if mapping.is_identity() {
return None;
}
let mut jinterners = Jinterners {
string: ArenaStr::with_capacity(self.string.strings(), self.string.bytes()),
iarray: ArenaSlice::with_capacity(self.iarray.slices(), self.iarray.items()),
iobject: ArenaSlice::with_capacity(self.iobject.slices(), self.iobject.items()),
};
for i in string_rev.iter() {
jinterners
.string
.push_mut(self.string.lookup(InternedStr::from_id(i)));
}
for i in 0..self.iarray.slices() as u32 {
let array = self.iarray.lookup(InternedSlice::from_id(i));
let iter = array.iter().map(|ivalue| mapping.map(*ivalue));
unsafe { jinterners.iarray.push_iter_mut(iter) };
}
let mut buffer = Vec::new();
for i in 0..self.iobject.slices() as u32 {
let object = self.iobject.lookup(InternedSlice::from_id(i));
buffer.extend(
object
.iter()
.map(|(k, ivalue)| (mapping.map_str_key(*k), mapping.map(*ivalue))),
);
buffer.sort_unstable_by_key(|(k, _)| *k);
jinterners.iobject.push_copy_mut(&buffer);
buffer.clear();
}
Some((jinterners, mapping))
}
#[expect(clippy::type_complexity)]
fn optimize_once_no_strings(
&self,
) -> Option<(
ArenaSlice<IValue>,
ArenaSlice<(InternedStrKey, IValue)>,
MappingNoStrings,
)> {
let iarray_rev = self.optimized_mapping_arrays();
let iobject_rev = self.optimized_mapping_objects();
let mapping = MappingNoStrings {
iarray: iarray_rev.reverse(),
iobject: iobject_rev.reverse(),
};
if mapping.is_identity() {
return None;
}
let mut iarray = ArenaSlice::with_capacity(self.iarray.slices(), self.iarray.items());
for i in iarray_rev.iter() {
let array = self.iarray.lookup(InternedSlice::from_id(i));
let iter = array.iter().map(|ivalue| mapping.map(*ivalue));
unsafe { iarray.push_iter_mut(iter) };
}
let mut iobject = ArenaSlice::with_capacity(self.iobject.slices(), self.iobject.items());
for i in iobject_rev.iter() {
let object = self.iobject.lookup(InternedSlice::from_id(i));
let iter = object.iter().map(|(k, ivalue)| (*k, mapping.map(*ivalue)));
unsafe { iobject.push_iter_mut(iter) };
}
Some((iarray, iobject, mapping))
}
fn optimized_mapping_strings(&self) -> RevMappingImpl {
let mut mapping: Vec<u32> = (0..self.string.strings() as u32).collect();
mapping.sort_by_cached_key(|i| CustomStrOrd(self.string.lookup(InternedStr::from_id(*i))));
RevMappingImpl(mapping.into_boxed_slice())
}
fn optimized_mapping_arrays(&self) -> RevMappingImpl {
let mut mapping: Vec<u32> = (0..self.iarray.slices() as u32).collect();
mapping
.sort_by_cached_key(|i| CustomSliceOrd(self.iarray.lookup(InternedSlice::from_id(*i))));
RevMappingImpl(mapping.into_boxed_slice())
}
fn optimized_mapping_objects(&self) -> RevMappingImpl {
let mut mapping: Vec<u32> = (0..self.iobject.slices() as u32).collect();
mapping.sort_by_cached_key(|i| {
CustomSliceOrd(self.iobject.lookup(InternedSlice::from_id(*i)))
});
RevMappingImpl(mapping.into_boxed_slice())
}
}
#[derive(PartialEq, Eq)]
struct CustomStrOrd<'a>(&'a str);
impl PartialOrd for CustomStrOrd<'_> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for CustomStrOrd<'_> {
fn cmp(&self, other: &Self) -> Ordering {
self.0
.len()
.cmp(&other.0.len())
.then_with(|| self.0.cmp(other.0))
}
}
#[derive(PartialEq, Eq)]
struct CustomSliceOrd<'a, T>(&'a [T]);
impl<T: Ord> PartialOrd for CustomSliceOrd<'_, T> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl<T: Ord> Ord for CustomSliceOrd<'_, T> {
fn cmp(&self, other: &Self) -> Ordering {
self.0
.len()
.cmp(&other.0.len())
.then_with(|| self.0.cmp(other.0))
}
}