Skip to main content

hexane/
raw.rs

1use super::aggregate::Acc;
2use super::columndata::ColumnData;
3use super::cursor::{ColumnCursor, HasPos, Run};
4use super::encoder::{Encoder, SpliceEncoder};
5use super::pack::PackError;
6use super::slab::{self, Slab, SlabTree, SlabWeight, SlabWriter, SpanWeight};
7use super::Cow;
8
9use std::fmt::Debug;
10use std::ops::Range;
11
12#[derive(Debug, Default, Clone, Copy, PartialEq)]
13pub struct RawCursorInternal<const B: usize> {
14    offset: usize,
15}
16
17/// A [`ColumnCursor`] for uncompressed raw byte columns.
18///
19/// Unlike other cursors, `RawCursor` stores data as-is without any RLE or delta encoding.
20/// Iteration yields one-byte `[u8]` slices at a time. Uses a 4096-byte slab size.
21pub type RawCursor = RawCursorInternal<4096>;
22
23impl<const B: usize> ColumnCursor for RawCursorInternal<B> {
24    type Item = [u8];
25    type State<'a> = ();
26    type PostState<'a> = Range<usize>; //&'a [u8];
27    type Export = u8;
28    type SlabIndex = SlabWeight;
29
30    fn empty() -> Self {
31        Self::default()
32    }
33
34    fn finish<'a>(_slab: &'a Slab, _writer: &mut SlabWriter<'a, [u8]>, _cursor: Self) {}
35
36    fn finalize_state<'a>(
37        slab: &'a Slab,
38        encoder: &mut Encoder<'a, Self>,
39        post: Self::PostState<'a>,
40        _cursor: Self,
41    ) -> Option<Self> {
42        let len = post.end - post.start;
43        encoder
44            .writer
45            .copy(slab.as_slice(), post, 0, len, Acc::new(), None);
46        None
47    }
48
49    fn copy_between<'a>(
50        _slab: &'a [u8],
51        _writer: &mut SlabWriter<'a, [u8]>,
52        _c0: Self,
53        _c1: Self,
54        _run: Run<'a, [u8]>,
55        _size: usize,
56    ) -> Self::State<'a> {
57        // only called from write and we override that
58    }
59
60    fn slab_size() -> usize {
61        B
62    }
63
64    fn splice_encoder(index: usize, del: usize, slab: &Slab) -> SpliceEncoder<'_, Self> {
65        let state = ();
66        let cursor = Self { offset: index };
67        let bytes = slab.as_slice();
68
69        // everything before...
70        let mut current = SlabWriter::new(B, false);
71        current.copy(bytes, 0..index, 0, index, Acc::new(), None);
72
73        let post;
74        let deleted;
75        if index + del < bytes.len() {
76            // everything after
77            post = (index + del)..(bytes.len());
78            deleted = del;
79        } else {
80            // nothing left
81            post = 0..0;
82            deleted = bytes.len() - index;
83        }
84        let overflow = del - deleted;
85        let acc = Acc::new();
86
87        SpliceEncoder {
88            encoder: Encoder::init(current, state),
89            slab,
90            post,
91            acc,
92            deleted,
93            overflow,
94            cursor,
95        }
96    }
97
98    fn export_splice<'a, I>(data: &mut Vec<Self::Export>, range: Range<usize>, values: I)
99    where
100        I: Iterator<Item = Option<Cow<'a, [u8]>>>,
101    {
102        let mut total: Vec<u8> = vec![];
103        for bytes in values.flatten() {
104            total.extend_from_slice(&bytes);
105        }
106        data.splice(range, total);
107    }
108
109    fn try_next<'a>(&mut self, slab: &'a [u8]) -> Result<Option<Run<'a, Self::Item>>, PackError> {
110        let next_offset = self.offset + 1;
111        if next_offset > slab.len() {
112            return Ok(None);
113        }
114        let data = &slab[self.offset..next_offset];
115        self.offset = next_offset;
116        Ok(Some(Run {
117            count: 1,
118            value: Some(Cow::Borrowed(data)),
119        }))
120    }
121
122    fn try_again<'a>(&self, slab: &'a [u8]) -> Result<Option<Run<'a, Self::Item>>, PackError> {
123        if self.offset == 0 {
124            Ok(None)
125        } else {
126            let data = &slab[(self.offset - 1)..self.offset];
127            Ok(Some(Run {
128                count: 1,
129                value: Some(Cow::Borrowed(data)),
130            }))
131        }
132    }
133
134    fn index(&self) -> usize {
135        self.offset
136    }
137
138    fn offset(&self) -> usize {
139        self.offset
140    }
141
142    fn load_with<F>(data: &[u8], _test: &F) -> Result<ColumnData<Self>, PackError>
143    where
144        F: Fn(Option<&Self::Item>) -> Option<String>,
145    {
146        let len = data.len();
147        let slab = Slab::new(data.to_vec(), len, Acc::default(), 0);
148        Ok(ColumnData::init(len, SlabTree::load([slab])))
149    }
150}
151
152/// A low-level reader for sequential byte access across a multi-slab tree.
153///
154/// Unlike column iterators, `RawReader` reads raw bytes rather than decoded values.
155/// It is used internally by Automerge to access byte fields (e.g. actor IDs, string
156/// payloads) that are stored as consecutive byte ranges across slab boundaries.
157///
158/// Obtain a `RawReader` via [`ColumnData::raw_reader`](crate::ColumnData::raw_reader).
159#[derive(Debug, Clone, Default)]
160pub struct RawReader<'a, T: SpanWeight<Slab> + HasPos> {
161    pub(crate) pos: usize,
162    pub(crate) slabs: slab::tree::SpanTreeIter<'a, Slab, T>,
163    pub(crate) current: Option<(&'a Slab, usize)>,
164}
165
166impl<'a, T: SpanWeight<Slab> + HasPos> RawReader<'a, T> {
167    pub fn empty() -> RawReader<'static, T> {
168        RawReader {
169            pos: 0,
170            slabs: slab::SpanTreeIter::default(),
171            current: None,
172        }
173    }
174
175    /// Read a slice out of a set of slabs
176    ///
177    /// Returns an error if:
178    /// * The read would cross a slab boundary
179    /// * The read would go past the end of the data
180    pub fn read_next(&mut self, length: usize) -> Result<&'a [u8], ReadRawError> {
181        if length == 0 {
182            return Ok(&[]);
183        }
184        let (slab, offset) = match self.current.take() {
185            Some(state) => state,
186            None => {
187                if let Some(slab) = self.slabs.next() {
188                    (slab, 0)
189                } else {
190                    return Err(ReadRawError::EndOfData);
191                }
192            }
193        };
194        if offset + length > slab.len() {
195            return Err(ReadRawError::CrossBoundary);
196        }
197        let result = slab[offset..offset + length].as_ref();
198        let new_offset = offset + length;
199        if new_offset == slab.len() {
200            self.current = None;
201        } else {
202            self.current = Some((slab, new_offset));
203        }
204        self.pos += length;
205        Ok(result)
206    }
207
208    pub fn seek_to(&mut self, advance: usize) {
209        if let Some(slabs) = self.slabs.span_tree() {
210            let cursor = slabs.get_where_or_last(|acc, next| advance < acc.pos() + next.pos());
211            let current = Some((cursor.element, advance - cursor.weight.pos()));
212            let slabs = slab::SpanTreeIter::new(slabs, cursor);
213            let pos = advance;
214            *self = RawReader {
215                pos,
216                slabs,
217                current,
218            }
219        }
220    }
221
222    /// Returns the current byte position, which can be passed to
223    /// [`ColumnData::raw_reader`](crate::ColumnData::raw_reader) to reconstruct a `RawReader`
224    /// at the same position.
225    pub fn suspend(&self) -> usize {
226        self.pos
227    }
228}
229
230/// Errors returned by [`RawReader::read_next`].
231#[derive(Debug, thiserror::Error)]
232pub enum ReadRawError {
233    /// The requested byte range crosses a slab boundary (slabs are immutable and non-contiguous).
234    #[error("attempted to read across slab boundaries")]
235    CrossBoundary,
236    #[error("attempted to read past end of data")]
237    EndOfData,
238}
239
240#[cfg(test)]
241pub(crate) mod tests {
242    use super::super::columndata::ColumnData;
243    use super::super::test::ColExport;
244    use super::*;
245
246    #[test]
247    fn column_data_raw_splice() {
248        let mut col1: ColumnData<RawCursorInternal<6>> = ColumnData::new();
249        col1.splice(0, 0, vec![vec![1, 1, 1]]);
250        assert_eq!(col1.test_dump(), vec![vec![ColExport::Raw(vec![1, 1, 1])]]);
251        col1.splice(0, 0, vec![vec![2, 2, 2]]);
252        assert_eq!(
253            col1.test_dump(),
254            vec![vec![ColExport::Raw(vec![2, 2, 2, 1, 1, 1])]]
255        );
256        col1.splice(3, 0, vec![vec![3, 3, 3]]);
257        assert_eq!(
258            col1.test_dump(),
259            vec![
260                vec![ColExport::Raw(vec![2, 2, 2, 3, 3, 3])],
261                vec![ColExport::Raw(vec![1, 1, 1])],
262            ]
263        );
264        col1.splice(3, 0, vec![vec![4, 4, 4]]);
265        assert_eq!(
266            col1.test_dump(),
267            vec![
268                vec![ColExport::Raw(vec![2, 2, 2, 4, 4, 4])],
269                vec![ColExport::Raw(vec![3, 3, 3])],
270                vec![ColExport::Raw(vec![1, 1, 1])],
271            ]
272        );
273        col1.splice::<Vec<u8>, _>(3, 1, vec![]);
274        assert_eq!(
275            col1.test_dump(),
276            vec![
277                vec![ColExport::Raw(vec![2, 2, 2, 4, 4])],
278                vec![ColExport::Raw(vec![3, 3, 3])],
279                vec![ColExport::Raw(vec![1, 1, 1])],
280            ]
281        );
282        col1.splice(3, 2, vec![vec![5, 5, 5, 5, 5, 5], vec![6, 6, 6]]);
283        assert_eq!(
284            col1.test_dump(),
285            vec![
286                vec![ColExport::Raw(vec![2, 2, 2, 5, 5, 5, 5, 5, 5])],
287                vec![ColExport::Raw(vec![6, 6, 6])],
288                vec![ColExport::Raw(vec![3, 3, 3])],
289                vec![ColExport::Raw(vec![1, 1, 1])],
290            ]
291        );
292    }
293
294    #[test]
295    fn raw_reader() {
296        let mut col: ColumnData<RawCursorInternal<6>> = ColumnData::new();
297        // stuff it with sets of 3 bytes
298        for n in 0..=255 {
299            col.splice(0, 0, vec![vec![n, n, n]]);
300        }
301        // single reader - read all;
302        let mut reader = col.raw_reader(0);
303        for m in (0..=255).rev() {
304            let val = reader.read_next(3).unwrap();
305            assert_eq!(&[m, m, m], val);
306        }
307        // many readers w offset;
308        for m in (0..=255).rev() {
309            let offset = (255 - m as usize) * 3;
310            let val = col.raw_reader(offset).read_next(3).unwrap();
311            assert_eq!(&[m, m, m], val);
312        }
313    }
314}