loro_internal/
arena.rs

1mod str_arena;
2
3use std::{
4    num::NonZeroU16,
5    ops::{Range, RangeBounds},
6    sync::Arc,
7};
8
9use crate::sync::{Mutex, MutexGuard};
10use append_only_bytes::BytesSlice;
11use fxhash::FxHashMap;
12use loro_common::PeerID;
13
14use crate::{
15    change::Lamport,
16    container::{
17        idx::ContainerIdx,
18        list::list_op::{InnerListOp, ListOp},
19        map::MapSet,
20        ContainerID,
21    },
22    id::Counter,
23    op::{InnerContent, ListSlice, Op, RawOp, RawOpContent, SliceRange},
24    LoroValue,
25};
26
27use self::str_arena::StrArena;
28
29#[derive(Default, Debug)]
30struct InnerSharedArena {
31    // The locks should not be exposed outside this file.
32    // It might be better to use RwLock in the future
33    container_idx_to_id: Mutex<Vec<ContainerID>>,
34    // 0 stands for unknown, 1 stands for root containers
35    depth: Mutex<Vec<Option<NonZeroU16>>>,
36    container_id_to_idx: Mutex<FxHashMap<ContainerID, ContainerIdx>>,
37    /// The parent of each container.
38    parents: Mutex<FxHashMap<ContainerIdx, Option<ContainerIdx>>>,
39    values: Mutex<Vec<LoroValue>>,
40    root_c_idx: Mutex<Vec<ContainerIdx>>,
41    str: Arc<Mutex<StrArena>>,
42}
43
44/// This is shared between [OpLog] and [AppState].
45///
46#[derive(Debug, Clone)]
47pub struct SharedArena {
48    inner: Arc<InnerSharedArena>,
49}
50
51#[derive(Debug)]
52pub struct StrAllocResult {
53    /// unicode start
54    pub start: usize,
55    /// unicode end
56    pub end: usize,
57}
58
59pub(crate) struct ArenaGuards<'a> {
60    container_id_to_idx: MutexGuard<'a, FxHashMap<ContainerID, ContainerIdx>>,
61    container_idx_to_id: MutexGuard<'a, Vec<ContainerID>>,
62    depth: MutexGuard<'a, Vec<Option<NonZeroU16>>>,
63    parents: MutexGuard<'a, FxHashMap<ContainerIdx, Option<ContainerIdx>>>,
64    root_c_idx: MutexGuard<'a, Vec<ContainerIdx>>,
65}
66
67impl ArenaGuards<'_> {
68    pub fn register_container(&mut self, id: &ContainerID) -> ContainerIdx {
69        if let Some(&idx) = self.container_id_to_idx.get(id) {
70            return idx;
71        }
72
73        let idx = self.container_idx_to_id.len();
74        self.container_idx_to_id.push(id.clone());
75        let idx = ContainerIdx::from_index_and_type(idx as u32, id.container_type());
76        self.container_id_to_idx.insert(id.clone(), idx);
77        if id.is_root() {
78            self.root_c_idx.push(idx);
79            self.parents.insert(idx, None);
80            self.depth.push(NonZeroU16::new(1));
81        } else {
82            self.depth.push(None);
83        }
84        idx
85    }
86
87    pub fn set_parent(&mut self, child: ContainerIdx, parent: Option<ContainerIdx>) {
88        self.parents.insert(child, parent);
89
90        match parent {
91            Some(p) => {
92                if let Some(d) = get_depth(p, &mut self.depth, &self.parents) {
93                    self.depth[child.to_index() as usize] = NonZeroU16::new(d.get() + 1);
94                } else {
95                    self.depth[child.to_index() as usize] = None;
96                }
97            }
98            None => {
99                self.depth[child.to_index() as usize] = NonZeroU16::new(1);
100            }
101        }
102    }
103}
104
105impl SharedArena {
106    #[allow(clippy::new_without_default)]
107    pub fn new() -> Self {
108        Self {
109            inner: Arc::new(InnerSharedArena::default()),
110        }
111    }
112
113    pub fn fork(&self) -> Self {
114        Self {
115            inner: Arc::new(InnerSharedArena {
116                container_idx_to_id: Mutex::new(
117                    self.inner.container_idx_to_id.lock().unwrap().clone(),
118                ),
119                depth: Mutex::new(self.inner.depth.lock().unwrap().clone()),
120                container_id_to_idx: Mutex::new(
121                    self.inner.container_id_to_idx.lock().unwrap().clone(),
122                ),
123                parents: Mutex::new(self.inner.parents.lock().unwrap().clone()),
124                values: Mutex::new(self.inner.values.lock().unwrap().clone()),
125                root_c_idx: Mutex::new(self.inner.root_c_idx.lock().unwrap().clone()),
126                str: self.inner.str.clone(),
127            }),
128        }
129    }
130
131    pub(crate) fn with_guards(&self, f: impl FnOnce(&mut ArenaGuards)) {
132        let mut guards = self.get_arena_guards();
133        f(&mut guards);
134    }
135
136    fn get_arena_guards(&self) -> ArenaGuards {
137        ArenaGuards {
138            container_id_to_idx: self.inner.container_id_to_idx.lock().unwrap(),
139            container_idx_to_id: self.inner.container_idx_to_id.lock().unwrap(),
140            depth: self.inner.depth.lock().unwrap(),
141            parents: self.inner.parents.lock().unwrap(),
142            root_c_idx: self.inner.root_c_idx.lock().unwrap(),
143        }
144    }
145
146    pub fn register_container(&self, id: &ContainerID) -> ContainerIdx {
147        let mut container_id_to_idx = self.inner.container_id_to_idx.lock().unwrap();
148        if let Some(&idx) = container_id_to_idx.get(id) {
149            return idx;
150        }
151
152        let mut container_idx_to_id = self.inner.container_idx_to_id.lock().unwrap();
153        let idx = container_idx_to_id.len();
154        container_idx_to_id.push(id.clone());
155        let idx = ContainerIdx::from_index_and_type(idx as u32, id.container_type());
156        container_id_to_idx.insert(id.clone(), idx);
157        if id.is_root() {
158            self.inner.root_c_idx.lock().unwrap().push(idx);
159            self.inner.parents.lock().unwrap().insert(idx, None);
160            self.inner.depth.lock().unwrap().push(NonZeroU16::new(1));
161        } else {
162            self.inner.depth.lock().unwrap().push(None);
163        }
164        idx
165    }
166
167    pub fn get_container_id(&self, idx: ContainerIdx) -> Option<ContainerID> {
168        let lock = self.inner.container_idx_to_id.lock().unwrap();
169        lock.get(idx.to_index() as usize).cloned()
170    }
171
172    pub fn id_to_idx(&self, id: &ContainerID) -> Option<ContainerIdx> {
173        self.inner
174            .container_id_to_idx
175            .lock()
176            .unwrap()
177            .get(id)
178            .copied()
179    }
180
181    #[inline]
182    pub fn idx_to_id(&self, id: ContainerIdx) -> Option<ContainerID> {
183        let lock = self.inner.container_idx_to_id.lock().unwrap();
184        lock.get(id.to_index() as usize).cloned()
185    }
186
187    #[inline]
188    pub fn with_idx_to_id<R>(&self, f: impl FnOnce(&Vec<ContainerID>) -> R) -> R {
189        let lock = self.inner.container_idx_to_id.lock().unwrap();
190        f(&lock)
191    }
192
193    pub fn alloc_str(&self, str: &str) -> StrAllocResult {
194        let mut text_lock = self.inner.str.lock().unwrap();
195        _alloc_str(&mut text_lock, str)
196    }
197
198    /// return slice and unicode index
199    pub fn alloc_str_with_slice(&self, str: &str) -> (BytesSlice, StrAllocResult) {
200        let mut text_lock = self.inner.str.lock().unwrap();
201        _alloc_str_with_slice(&mut text_lock, str)
202    }
203
204    /// alloc str without extra info
205    pub fn alloc_str_fast(&self, bytes: &[u8]) {
206        let mut text_lock = self.inner.str.lock().unwrap();
207        text_lock.alloc(std::str::from_utf8(bytes).unwrap());
208    }
209
210    #[inline]
211    pub fn utf16_len(&self) -> usize {
212        self.inner.str.lock().unwrap().len_utf16()
213    }
214
215    #[inline]
216    pub fn alloc_value(&self, value: LoroValue) -> usize {
217        let mut values_lock = self.inner.values.lock().unwrap();
218        _alloc_value(&mut values_lock, value)
219    }
220
221    #[inline]
222    pub fn alloc_values(&self, values: impl Iterator<Item = LoroValue>) -> std::ops::Range<usize> {
223        let mut values_lock = self.inner.values.lock().unwrap();
224        _alloc_values(&mut values_lock, values)
225    }
226
227    #[inline]
228    pub fn set_parent(&self, child: ContainerIdx, parent: Option<ContainerIdx>) {
229        let parents = &mut self.inner.parents.lock().unwrap();
230        parents.insert(child, parent);
231        let mut depth = self.inner.depth.lock().unwrap();
232
233        match parent {
234            Some(p) => {
235                if let Some(d) = get_depth(p, &mut depth, parents) {
236                    depth[child.to_index() as usize] = NonZeroU16::new(d.get() + 1);
237                } else {
238                    depth[child.to_index() as usize] = None;
239                }
240            }
241            None => {
242                depth[child.to_index() as usize] = NonZeroU16::new(1);
243            }
244        }
245    }
246
247    pub fn log_hierarchy(&self) {
248        if cfg!(debug_assertions) {
249            for (c, p) in self.inner.parents.lock().unwrap().iter() {
250                tracing::info!(
251                    "container {:?} {:?} {:?}",
252                    c,
253                    self.get_container_id(*c),
254                    p.map(|x| self.get_container_id(x))
255                );
256            }
257        }
258    }
259
260    pub fn log_all_containers(&self) {
261        self.inner
262            .container_id_to_idx
263            .lock()
264            .unwrap()
265            .iter()
266            .for_each(|(id, idx)| {
267                tracing::info!("container {:?} {:?}", id, idx);
268            });
269        self.inner
270            .container_idx_to_id
271            .lock()
272            .unwrap()
273            .iter()
274            .enumerate()
275            .for_each(|(i, id)| {
276                tracing::info!("container {} {:?}", i, id);
277            });
278    }
279
280    pub fn get_parent(&self, child: ContainerIdx) -> Option<ContainerIdx> {
281        if self.get_container_id(child).unwrap().is_root() {
282            // TODO: PERF: we can speed this up by use a special bit in ContainerIdx to indicate
283            // whether the target is a root container
284            return None;
285        }
286
287        self.inner
288            .parents
289            .lock()
290            .unwrap()
291            .get(&child)
292            .copied()
293            .expect("InternalError: Parent is not registered")
294    }
295
296    /// Call `f` on each ancestor of `container`, including `container` itself.
297    ///
298    /// f(ContainerIdx, is_first)
299    pub fn with_ancestors(&self, container: ContainerIdx, mut f: impl FnMut(ContainerIdx, bool)) {
300        let mut container = Some(container);
301        let mut is_first = true;
302        while let Some(c) = container {
303            f(c, is_first);
304            is_first = false;
305            container = self.get_parent(c)
306        }
307    }
308
309    #[inline]
310    pub fn slice_by_unicode(&self, range: impl RangeBounds<usize>) -> BytesSlice {
311        self.inner.str.lock().unwrap().slice_by_unicode(range)
312    }
313
314    #[inline]
315    pub fn slice_by_utf8(&self, range: impl RangeBounds<usize>) -> BytesSlice {
316        self.inner.str.lock().unwrap().slice_bytes(range)
317    }
318
319    #[inline]
320    pub fn slice_str_by_unicode_range(&self, range: Range<usize>) -> String {
321        let mut s = self.inner.str.lock().unwrap();
322        let s: &mut StrArena = &mut s;
323        let mut ans = String::with_capacity(range.len());
324        ans.push_str(s.slice_str_by_unicode(range));
325        ans
326    }
327
328    #[inline]
329    pub fn with_text_slice(&self, range: Range<usize>, mut f: impl FnMut(&str)) {
330        f(self.inner.str.lock().unwrap().slice_str_by_unicode(range))
331    }
332
333    #[inline]
334    pub fn get_value(&self, idx: usize) -> Option<LoroValue> {
335        self.inner.values.lock().unwrap().get(idx).cloned()
336    }
337
338    #[inline]
339    pub fn get_values(&self, range: Range<usize>) -> Vec<LoroValue> {
340        (self.inner.values.lock().unwrap()[range]).to_vec()
341    }
342
343    pub fn convert_single_op(
344        &self,
345        container: &ContainerID,
346        peer: PeerID,
347        counter: Counter,
348        lamport: Lamport,
349        content: RawOpContent,
350    ) -> Op {
351        let container = self.register_container(container);
352        self.inner_convert_op(content, peer, counter, lamport, container)
353    }
354
355    pub fn can_import_snapshot(&self) -> bool {
356        self.inner.str.lock().unwrap().is_empty() && self.inner.values.lock().unwrap().is_empty()
357    }
358
359    fn inner_convert_op(
360        &self,
361        content: RawOpContent<'_>,
362        _peer: PeerID,
363        counter: i32,
364        _lamport: Lamport,
365        container: ContainerIdx,
366    ) -> Op {
367        match content {
368            crate::op::RawOpContent::Map(MapSet { key, value }) => Op {
369                counter,
370                container,
371                content: crate::op::InnerContent::Map(MapSet { key, value }),
372            },
373            crate::op::RawOpContent::List(list) => match list {
374                ListOp::Insert { slice, pos } => match slice {
375                    ListSlice::RawData(values) => {
376                        let range = self.alloc_values(values.iter().cloned());
377                        Op {
378                            counter,
379                            container,
380                            content: crate::op::InnerContent::List(InnerListOp::Insert {
381                                slice: SliceRange::from(range.start as u32..range.end as u32),
382                                pos,
383                            }),
384                        }
385                    }
386                    ListSlice::RawStr { str, unicode_len } => {
387                        let (slice, info) = self.alloc_str_with_slice(&str);
388                        Op {
389                            counter,
390                            container,
391                            content: crate::op::InnerContent::List(InnerListOp::InsertText {
392                                slice,
393                                unicode_start: info.start as u32,
394                                unicode_len: unicode_len as u32,
395                                pos: pos as u32,
396                            }),
397                        }
398                    }
399                },
400                ListOp::Delete(span) => Op {
401                    counter,
402                    container,
403                    content: crate::op::InnerContent::List(InnerListOp::Delete(span)),
404                },
405                ListOp::StyleStart {
406                    start,
407                    end,
408                    info,
409                    key,
410                    value,
411                } => Op {
412                    counter,
413                    container,
414                    content: InnerContent::List(InnerListOp::StyleStart {
415                        start,
416                        end,
417                        key,
418                        info,
419                        value,
420                    }),
421                },
422                ListOp::StyleEnd => Op {
423                    counter,
424                    container,
425                    content: InnerContent::List(InnerListOp::StyleEnd),
426                },
427                ListOp::Move {
428                    from,
429                    to,
430                    elem_id: from_id,
431                } => Op {
432                    counter,
433                    container,
434                    content: InnerContent::List(InnerListOp::Move {
435                        from,
436                        to,
437                        elem_id: from_id,
438                    }),
439                },
440                ListOp::Set { elem_id, value } => Op {
441                    counter,
442                    container,
443                    content: InnerContent::List(InnerListOp::Set { elem_id, value }),
444                },
445            },
446            crate::op::RawOpContent::Tree(tree) => Op {
447                counter,
448                container,
449                content: crate::op::InnerContent::Tree(tree.clone()),
450            },
451            #[cfg(feature = "counter")]
452            crate::op::RawOpContent::Counter(c) => Op {
453                counter,
454                container,
455                content: crate::op::InnerContent::Future(crate::op::FutureInnerContent::Counter(c)),
456            },
457            crate::op::RawOpContent::Unknown { prop, value } => Op {
458                counter,
459                container,
460                content: crate::op::InnerContent::Future(crate::op::FutureInnerContent::Unknown {
461                    prop,
462                    value: Box::new(value),
463                }),
464            },
465        }
466    }
467
468    #[inline]
469    pub fn convert_raw_op(&self, op: &RawOp) -> Op {
470        self.inner_convert_op(
471            op.content.clone(),
472            op.id.peer,
473            op.id.counter,
474            op.lamport,
475            op.container,
476        )
477    }
478
479    #[inline]
480    pub fn export_containers(&self) -> Vec<ContainerID> {
481        self.inner.container_idx_to_id.lock().unwrap().clone()
482    }
483
484    pub fn export_parents(&self) -> Vec<Option<ContainerIdx>> {
485        let parents = self.inner.parents.lock().unwrap();
486        let containers = self.inner.container_idx_to_id.lock().unwrap();
487        containers
488            .iter()
489            .enumerate()
490            .map(|(x, id)| {
491                let idx = ContainerIdx::from_index_and_type(x as u32, id.container_type());
492                let parent_idx = parents.get(&idx)?;
493                *parent_idx
494            })
495            .collect()
496    }
497
498    #[inline]
499    pub fn root_containers(&self) -> Vec<ContainerIdx> {
500        self.inner.root_c_idx.lock().unwrap().clone()
501    }
502
503    // TODO: this can return a u16 directly now, since the depths are always valid
504    pub(crate) fn get_depth(&self, container: ContainerIdx) -> Option<NonZeroU16> {
505        get_depth(
506            container,
507            &mut self.inner.depth.lock().unwrap(),
508            &self.inner.parents.lock().unwrap(),
509        )
510    }
511
512    pub(crate) fn iter_value_slice(
513        &self,
514        range: Range<usize>,
515    ) -> impl Iterator<Item = LoroValue> + '_ {
516        let values = self.inner.values.lock().unwrap();
517        range
518            .into_iter()
519            .map(move |i| values.get(i).unwrap().clone())
520    }
521
522    pub(crate) fn get_root_container_idx_by_key(
523        &self,
524        root_index: &loro_common::InternalString,
525    ) -> Option<ContainerIdx> {
526        let inner = self.inner.container_id_to_idx.lock().unwrap();
527        for t in loro_common::ContainerType::ALL_TYPES.iter() {
528            let cid = ContainerID::Root {
529                name: root_index.clone(),
530                container_type: *t,
531            };
532            if let Some(idx) = inner.get(&cid) {
533                return Some(*idx);
534            }
535        }
536        None
537    }
538
539    #[allow(unused)]
540    pub(crate) fn log_all_values(&self) {
541        let values = self.inner.values.lock().unwrap();
542        for (i, v) in values.iter().enumerate() {
543            tracing::trace!("value {} {:?}", i, v);
544        }
545    }
546}
547
548fn _alloc_str_with_slice(
549    text_lock: &mut MutexGuard<'_, StrArena>,
550    str: &str,
551) -> (BytesSlice, StrAllocResult) {
552    let start = text_lock.len_bytes();
553    let ans = _alloc_str(text_lock, str);
554    (text_lock.slice_bytes(start..), ans)
555}
556
557fn _alloc_values(
558    values_lock: &mut MutexGuard<'_, Vec<LoroValue>>,
559    values: impl Iterator<Item = LoroValue>,
560) -> Range<usize> {
561    values_lock.reserve(values.size_hint().0);
562    let start = values_lock.len();
563    for value in values {
564        values_lock.push(value);
565    }
566
567    start..values_lock.len()
568}
569
570fn _alloc_value(values_lock: &mut MutexGuard<'_, Vec<LoroValue>>, value: LoroValue) -> usize {
571    values_lock.push(value);
572    values_lock.len() - 1
573}
574
575fn _alloc_str(text_lock: &mut MutexGuard<'_, StrArena>, str: &str) -> StrAllocResult {
576    let start = text_lock.len_unicode();
577    text_lock.alloc(str);
578    StrAllocResult {
579        start,
580        end: text_lock.len_unicode(),
581    }
582}
583
584fn _slice_str(range: Range<usize>, s: &mut StrArena) -> String {
585    let mut ans = String::with_capacity(range.len());
586    ans.push_str(s.slice_str_by_unicode(range));
587    ans
588}
589
590fn get_depth(
591    target: ContainerIdx,
592    depth: &mut Vec<Option<NonZeroU16>>,
593    parents: &FxHashMap<ContainerIdx, Option<ContainerIdx>>,
594) -> Option<NonZeroU16> {
595    let mut d = depth[target.to_index() as usize];
596    if d.is_some() {
597        return d;
598    }
599
600    let parent = parents.get(&target)?;
601    match parent {
602        Some(p) => {
603            d = NonZeroU16::new(get_depth(*p, depth, parents)?.get() + 1);
604            depth[target.to_index() as usize] = d;
605        }
606        None => {
607            depth[target.to_index() as usize] = NonZeroU16::new(1);
608            d = NonZeroU16::new(1);
609        }
610    }
611
612    d
613}