Skip to main content

jinterner/
lib.rs

1//! An efficient and concurrent interning library for JSON values.
2
3#![forbid(missing_docs, unsafe_code)]
4#![cfg_attr(docsrs, feature(doc_cfg))]
5
6#[cfg(feature = "delta")]
7mod delta;
8mod detail;
9
10use blazinterner::{Arena, ArenaSlice, Interned, InternedSlice};
11#[cfg(feature = "delta")]
12pub use delta::DeltaEncoding;
13use detail::InternedStrKey;
14pub use detail::mapping::Mapping;
15use detail::mapping::{MappingNoStrings, MappingStrings, RevMappingImpl};
16pub use detail::{IValue, ValueRef};
17#[cfg(feature = "get-size2")]
18use get_size2::GetSize;
19#[cfg(feature = "serde")]
20use serde_tuple::{Deserialize_tuple, Serialize_tuple};
21use std::cmp::Ordering;
22
23/// An arena to store interned JSON values.
24#[derive(Default, Debug, PartialEq, Eq)]
25#[cfg_attr(feature = "serde", derive(Serialize_tuple, Deserialize_tuple))]
26#[cfg_attr(feature = "get-size2", derive(GetSize))]
27pub struct Jinterners {
28    string: Arena<str, Box<str>>,
29    iarray: ArenaSlice<IValue>,
30    iobject: ArenaSlice<(InternedStrKey, IValue)>,
31}
32
33#[cfg(feature = "get-size2")]
34impl Jinterners {
35    /// Gets the size in bytes of the underlying string arena.
36    pub fn get_size_strings(&self) -> usize {
37        self.string.get_size()
38    }
39
40    /// Gets the size in bytes of the underlying array arena.
41    pub fn get_size_arrays(&self) -> usize {
42        self.iarray.get_size()
43    }
44
45    /// Gets the size in bytes of the underlying object arena.
46    pub fn get_size_objects(&self) -> usize {
47        self.iobject.get_size()
48    }
49}
50
51#[cfg(feature = "debug")]
52impl Jinterners {
53    /// Prints a summary of the storage used by the underlying string arena to
54    /// stdout.
55    pub fn print_summary_strings(&self, prefix: &str, title: &str, total_bytes: usize) {
56        self.string.print_summary(prefix, title, total_bytes);
57    }
58
59    /// Prints a summary of the storage used by the underlying array arena to
60    /// stdout.
61    pub fn print_summary_arrays(&self, prefix: &str, title: &str, total_bytes: usize) {
62        self.iarray.print_summary(prefix, title, total_bytes);
63    }
64
65    /// Prints a summary of the storage used by the underlying object arena to
66    /// stdout.
67    pub fn print_summary_objects(&self, prefix: &str, title: &str, total_bytes: usize) {
68        self.iobject.print_summary(prefix, title, total_bytes);
69    }
70}
71
72impl Jinterners {
73    /// Returns an optimized version of this [`Jinterners`], or [`None`] if the
74    /// iteration `limit` is set to zero.
75    ///
76    /// [`IValue`]s rooted in this [`Jinterners`] need to be converted using the
77    /// resulting [`Mapping`] to be used in the destination [`Jinterners`].
78    pub fn optimize(&self, limit: Option<usize>) -> Option<(Jinterners, Mapping)> {
79        if limit == Some(0) {
80            return None;
81        }
82
83        let mut optimized = self.optimize_once_strings().map(|(jinterners, mapping)| {
84            let mapping = mapping.promote(
85                jinterners.iarray.slices() as u32,
86                jinterners.iobject.slices() as u32,
87            );
88            (jinterners, mapping)
89        });
90
91        let mut i = 0;
92        loop {
93            if limit == Some(i) {
94                break;
95            }
96
97            let jinterners = match optimized {
98                None => self,
99                Some((ref jinterners, _)) => jinterners,
100            };
101            let (jinterners, mapping) = match jinterners.optimize_once_no_strings() {
102                None => break,
103                Some((iarray, iobject, mapping_opt)) => match optimized {
104                    None => {
105                        let num_strings = self.string.len() as u32;
106                        let mut string = Arena::with_capacity(self.string.len());
107                        for i in 0..num_strings {
108                            string.push_mut(Interned::from_id(i).lookup_ref(&self.string).into());
109                        }
110
111                        (
112                            Jinterners {
113                                string,
114                                iarray,
115                                iobject,
116                            },
117                            mapping_opt.promote(num_strings),
118                        )
119                    }
120                    Some((mut jinterners, mapping)) => {
121                        jinterners.iarray = iarray;
122                        jinterners.iobject = iobject;
123                        (jinterners, mapping.compose(mapping_opt))
124                    }
125                },
126            };
127            optimized = Some((jinterners, mapping));
128
129            i = i.wrapping_add(1);
130        }
131        optimized
132    }
133
134    /// Returns a partially optimized version of this [`Jinterners`], or
135    /// [`None`] if this instance was already optimized.
136    ///
137    /// This only runs one iteration of the optimization routine, so you may
138    /// want to use [`optimize()`](Self::optimize) instead.
139    ///
140    /// [`IValue`]s rooted in this [`Jinterners`] need to be converted using the
141    /// resulting [`Mapping`] to be used in the destination [`Jinterners`].
142    pub fn optimize_once(&self) -> Option<(Jinterners, Mapping)> {
143        let string_rev = self.optimized_mapping_strings();
144        let iarray_rev = self.optimized_mapping_arrays();
145        let iobject_rev = self.optimized_mapping_objects();
146
147        let mapping = Mapping {
148            string: string_rev.reverse(),
149            iarray: iarray_rev.reverse(),
150            iobject: iobject_rev.reverse(),
151        };
152        if mapping.is_identity() {
153            return None;
154        }
155
156        let mut jinterners = Jinterners {
157            string: Arena::with_capacity(self.string.len()),
158            iarray: ArenaSlice::with_capacity(self.iarray.slices(), self.iarray.items()),
159            iobject: ArenaSlice::with_capacity(self.iobject.slices(), self.iobject.items()),
160        };
161
162        for i in string_rev.iter() {
163            jinterners
164                .string
165                .push_mut(Interned::from_id(i).lookup_ref(&self.string).into());
166        }
167        for i in iarray_rev.iter() {
168            let array = InternedSlice::from_id(i).lookup(&self.iarray);
169            let array: Box<[_]> = array.iter().map(|ivalue| mapping.map(*ivalue)).collect();
170            jinterners.iarray.push_mut(&array);
171        }
172        for i in iobject_rev.iter() {
173            let object = InternedSlice::from_id(i).lookup(&self.iobject);
174            let object: Box<[_]> = object
175                .iter()
176                .map(|(k, ivalue)| (mapping.map_str_key(*k), mapping.map(*ivalue)))
177                .collect();
178            jinterners.iobject.push_mut(&object);
179        }
180
181        Some((jinterners, mapping))
182    }
183
184    fn optimize_once_strings(&self) -> Option<(Jinterners, MappingStrings)> {
185        let string_rev = self.optimized_mapping_strings();
186        let mapping = MappingStrings {
187            string: string_rev.reverse(),
188        };
189
190        if mapping.is_identity() {
191            return None;
192        }
193
194        let mut jinterners = Jinterners {
195            string: Arena::with_capacity(self.string.len()),
196            iarray: ArenaSlice::with_capacity(self.iarray.slices(), self.iarray.items()),
197            iobject: ArenaSlice::with_capacity(self.iobject.slices(), self.iobject.items()),
198        };
199
200        for i in string_rev.iter() {
201            jinterners
202                .string
203                .push_mut(Interned::from_id(i).lookup_ref(&self.string).into());
204        }
205        for i in 0..self.iarray.slices() as u32 {
206            let array = InternedSlice::from_id(i).lookup(&self.iarray);
207            let array: Box<[_]> = array.iter().map(|ivalue| mapping.map(*ivalue)).collect();
208            jinterners.iarray.push_mut(&array);
209        }
210        for i in 0..self.iobject.slices() as u32 {
211            let object = InternedSlice::from_id(i).lookup(&self.iobject);
212            let object: Box<[_]> = object
213                .iter()
214                .map(|(k, ivalue)| (mapping.map_str_key(*k), mapping.map(*ivalue)))
215                .collect();
216            jinterners.iobject.push_mut(&object);
217        }
218
219        Some((jinterners, mapping))
220    }
221
222    #[expect(clippy::type_complexity)]
223    fn optimize_once_no_strings(
224        &self,
225    ) -> Option<(
226        ArenaSlice<IValue>,
227        ArenaSlice<(InternedStrKey, IValue)>,
228        MappingNoStrings,
229    )> {
230        let iarray_rev = self.optimized_mapping_arrays();
231        let iobject_rev = self.optimized_mapping_objects();
232
233        let mapping = MappingNoStrings {
234            iarray: iarray_rev.reverse(),
235            iobject: iobject_rev.reverse(),
236        };
237        if mapping.is_identity() {
238            return None;
239        }
240
241        let mut iarray = ArenaSlice::with_capacity(self.iarray.slices(), self.iarray.items());
242        for i in iarray_rev.iter() {
243            let array = InternedSlice::from_id(i).lookup(&self.iarray);
244            let array: Box<[_]> = array.iter().map(|ivalue| mapping.map(*ivalue)).collect();
245            iarray.push_mut(&array);
246        }
247
248        let mut iobject = ArenaSlice::with_capacity(self.iobject.slices(), self.iobject.items());
249        for i in iobject_rev.iter() {
250            let object = InternedSlice::from_id(i).lookup(&self.iobject);
251            let object: Box<[_]> = object
252                .iter()
253                .map(|(k, ivalue)| (*k, mapping.map(*ivalue)))
254                .collect();
255            iobject.push_mut(&object);
256        }
257
258        Some((iarray, iobject, mapping))
259    }
260
261    fn optimized_mapping_strings(&self) -> RevMappingImpl {
262        let mut mapping: Vec<u32> = (0..self.string.len() as u32).collect();
263        mapping
264            .sort_by_cached_key(|i| CustomStrOrd(Interned::from_id(*i).lookup_ref(&self.string)));
265        RevMappingImpl(mapping.into_boxed_slice())
266    }
267
268    fn optimized_mapping_arrays(&self) -> RevMappingImpl {
269        let mut mapping: Vec<u32> = (0..self.iarray.slices() as u32).collect();
270        mapping.sort_by_cached_key(|i| {
271            CustomSliceOrd(InternedSlice::from_id(*i).lookup(&self.iarray))
272        });
273        RevMappingImpl(mapping.into_boxed_slice())
274    }
275
276    fn optimized_mapping_objects(&self) -> RevMappingImpl {
277        let mut mapping: Vec<u32> = (0..self.iobject.slices() as u32).collect();
278        mapping.sort_by_cached_key(|i| {
279            CustomSliceOrd(InternedSlice::from_id(*i).lookup(&self.iobject))
280        });
281        RevMappingImpl(mapping.into_boxed_slice())
282    }
283}
284
285#[derive(PartialEq, Eq)]
286struct CustomStrOrd<'a>(&'a str);
287
288impl PartialOrd for CustomStrOrd<'_> {
289    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
290        Some(self.cmp(other))
291    }
292}
293
294impl Ord for CustomStrOrd<'_> {
295    fn cmp(&self, other: &Self) -> Ordering {
296        self.0
297            .len()
298            .cmp(&other.0.len())
299            .then_with(|| self.0.cmp(other.0))
300    }
301}
302
303#[derive(PartialEq, Eq)]
304struct CustomSliceOrd<'a, T>(&'a [T]);
305
306impl<T: Ord> PartialOrd for CustomSliceOrd<'_, T> {
307    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
308        Some(self.cmp(other))
309    }
310}
311
312impl<T: Ord> Ord for CustomSliceOrd<'_, T> {
313    fn cmp(&self, other: &Self) -> Ordering {
314        self.0
315            .len()
316            .cmp(&other.0.len())
317            .then_with(|| self.0.cmp(other.0))
318    }
319}