yrs/updates/
decoder.rs

1use crate::block::ClientID;
2use crate::encoding::read::{Cursor, Error, Read};
3use crate::*;
4use std::sync::Arc;
5
6/// A trait that can be implemented by any other type in order to support lib0 decoding capability.
7pub trait Decode: Sized {
8    fn decode<D: Decoder>(decoder: &mut D) -> Result<Self, Error>;
9
10    /// Helper function for decoding 1st version of lib0 encoding.
11    fn decode_v1(data: &[u8]) -> Result<Self, Error> {
12        let mut decoder = DecoderV1::from(data);
13        Self::decode(&mut decoder)
14    }
15
16    /// Helper function for decoding 2nd version of lib0 encoding.
17    fn decode_v2(data: &[u8]) -> Result<Self, Error> {
18        let mut decoder = DecoderV2::new(Cursor::new(data))?;
19        Self::decode(&mut decoder)
20    }
21}
22
23/// Trait used by lib0 decoders. Natively lib0 encoding supports two versions:
24///
25/// 1. 1st version (implemented in Yrs) uses simple optimization techniques like var int encoding.
26/// 2. 2nd version optimizes bigger batches of blocks by using run-length encoding.
27///
28/// Both of these define a common set of operations defined in this trait.  
29pub trait Decoder: Read {
30    /// Reset the value of current delete set state.
31    fn reset_ds_cur_val(&mut self);
32
33    /// Read next [DeleteSet] clock value.
34    fn read_ds_clock(&mut self) -> Result<u32, Error>;
35
36    /// Read the number of clients stored in encoded [DeleteSet].
37    fn read_ds_len(&mut self) -> Result<u32, Error>;
38
39    /// Read left origin of a currently decoded [Block].
40    fn read_left_id(&mut self) -> Result<ID, Error>;
41
42    /// Read right origin of a currently decoded [Block].
43    fn read_right_id(&mut self) -> Result<ID, Error>;
44
45    /// Read currently decoded client identifier.
46    fn read_client(&mut self) -> Result<ClientID, Error>;
47
48    /// Read info bit flags of a currently decoded [Block].
49    fn read_info(&mut self) -> Result<u8, Error>;
50
51    /// Read bit flags determining type of parent of a currently decoded [Block].
52    fn read_parent_info(&mut self) -> Result<bool, Error>;
53
54    /// Read type ref info of a currently decoded [Block] parent.
55    fn read_type_ref(&mut self) -> Result<u8, Error>;
56
57    /// Read length parameter.
58    fn read_len(&mut self) -> Result<u32, Error>;
59
60    /// Decode a JSON-like data type. It's a complex type which is an extension of native JavaScript
61    /// Object Notation.
62    fn read_any(&mut self) -> Result<Any, Error>;
63
64    /// Decode an embedded JSON string into [Any] struct. It's a complex type which is an extension
65    /// of native JavaScript Object Notation.
66    fn read_json(&mut self) -> Result<Any, Error>;
67
68    /// Read key string.
69    fn read_key(&mut self) -> Result<Arc<str>, Error>;
70
71    /// Consume a rest of the decoded buffer data and return it without parsing.
72    fn read_to_end(&mut self) -> Result<&[u8], Error>;
73}
74
75/// Version 1 of lib0 decoder.
76pub struct DecoderV1<'a> {
77    cursor: Cursor<'a>,
78}
79
80impl<'a> DecoderV1<'a> {
81    pub fn new(cursor: Cursor<'a>) -> Self {
82        DecoderV1 { cursor }
83    }
84
85    fn read_id(&mut self) -> Result<ID, Error> {
86        let client: u32 = self.read_var()?;
87        let clock = self.read_var()?;
88        Ok(ID::new(client as ClientID, clock))
89    }
90}
91
92impl<'a> From<Cursor<'a>> for DecoderV1<'a> {
93    fn from(cursor: Cursor<'a>) -> Self {
94        Self::new(cursor)
95    }
96}
97
98impl<'a> From<&'a [u8]> for DecoderV1<'a> {
99    fn from(buf: &'a [u8]) -> Self {
100        Self::new(Cursor::new(buf))
101    }
102}
103
104impl<'a> Read for DecoderV1<'a> {
105    #[inline]
106    fn read_u8(&mut self) -> Result<u8, Error> {
107        self.cursor.read_u8()
108    }
109
110    #[inline]
111    fn read_exact(&mut self, len: usize) -> Result<&[u8], Error> {
112        self.cursor.read_exact(len)
113    }
114}
115
116impl<'a> Decoder for DecoderV1<'a> {
117    #[inline]
118    fn reset_ds_cur_val(&mut self) {
119        /* no op */
120    }
121
122    #[inline]
123    fn read_ds_clock(&mut self) -> Result<u32, Error> {
124        self.read_var()
125    }
126
127    #[inline]
128    fn read_ds_len(&mut self) -> Result<u32, Error> {
129        self.read_var()
130    }
131
132    #[inline]
133    fn read_left_id(&mut self) -> Result<ID, Error> {
134        self.read_id()
135    }
136
137    #[inline]
138    fn read_right_id(&mut self) -> Result<ID, Error> {
139        self.read_id()
140    }
141
142    #[inline]
143    fn read_client(&mut self) -> Result<ClientID, Error> {
144        let client: u32 = self.cursor.read_var()?;
145        Ok(client as ClientID)
146    }
147
148    #[inline]
149    fn read_info(&mut self) -> Result<u8, Error> {
150        self.cursor.read_u8()
151    }
152
153    #[inline]
154    fn read_parent_info(&mut self) -> Result<bool, Error> {
155        let info: u32 = self.cursor.read_var()?;
156        Ok(info == 1)
157    }
158
159    #[inline]
160    fn read_type_ref(&mut self) -> Result<u8, Error> {
161        // In Yjs we use read_var_uint but use only 7 bit. So this is equivalent.
162        self.cursor.read_u8()
163    }
164
165    #[inline]
166    fn read_len(&mut self) -> Result<u32, Error> {
167        self.read_var()
168    }
169
170    #[inline]
171    fn read_any(&mut self) -> Result<Any, Error> {
172        Any::decode(self)
173    }
174
175    fn read_json(&mut self) -> Result<Any, Error> {
176        let src = self.read_string()?;
177        Any::from_json(src)
178    }
179
180    #[inline]
181    fn read_key(&mut self) -> Result<Arc<str>, Error> {
182        let str: Arc<str> = self.read_string()?.into();
183        Ok(str)
184    }
185
186    #[inline]
187    fn read_to_end(&mut self) -> Result<&[u8], Error> {
188        Ok(&self.cursor.buf[self.cursor.next..])
189    }
190}
191
192/// Version 2 of lib0 decoder.
193pub struct DecoderV2<'a> {
194    cursor: Cursor<'a>,
195    keys: Vec<Arc<str>>,
196    ds_curr_val: u32,
197    key_clock_decoder: IntDiffOptRleDecoder<'a>,
198    client_decoder: UIntOptRleDecoder<'a>,
199    left_clock_decoder: IntDiffOptRleDecoder<'a>,
200    right_clock_decoder: IntDiffOptRleDecoder<'a>,
201    info_decoder: RleDecoder<'a>,
202    string_decoder: StringDecoder<'a>,
203    parent_info_decoder: RleDecoder<'a>,
204    type_ref_decoder: UIntOptRleDecoder<'a>,
205    len_decoder: UIntOptRleDecoder<'a>,
206}
207
208impl<'a> DecoderV2<'a> {
209    pub fn new(mut cursor: Cursor<'a>) -> Result<Self, Error> {
210        if cursor.has_content() {
211            // read feature flag - currently unused
212            let _: u8 = cursor.read_u8()?;
213        }
214        let mut idx = cursor.next;
215        let buf = cursor.buf;
216
217        let key_clock_buf = Self::read_buf(buf, &mut idx)?;
218        let client_buf = Self::read_buf(buf, &mut idx)?;
219        let left_clock_buf = Self::read_buf(buf, &mut idx)?;
220        let right_clock_buf = Self::read_buf(buf, &mut idx)?;
221        let info_buf = Self::read_buf(buf, &mut idx)?;
222        let string_buf = Self::read_buf(buf, &mut idx)?;
223        let parent_info_buf = Self::read_buf(buf, &mut idx)?;
224        let type_ref_buf = Self::read_buf(buf, &mut idx)?;
225        let len_buf = Self::read_buf(buf, &mut idx)?;
226        let cursor = Cursor {
227            buf: &buf[idx..],
228            next: 0,
229        };
230        Ok(DecoderV2 {
231            cursor,
232            ds_curr_val: 0,
233            keys: Vec::new(),
234            key_clock_decoder: IntDiffOptRleDecoder::new(Cursor::new(key_clock_buf)),
235            client_decoder: UIntOptRleDecoder::new(Cursor::new(client_buf)),
236            left_clock_decoder: IntDiffOptRleDecoder::new(Cursor::new(left_clock_buf)),
237            right_clock_decoder: IntDiffOptRleDecoder::new(Cursor::new(right_clock_buf)),
238            info_decoder: RleDecoder::new(Cursor::new(info_buf)),
239            string_decoder: StringDecoder::new(Cursor::new(string_buf))?,
240            parent_info_decoder: RleDecoder::new(Cursor::new(parent_info_buf)),
241            type_ref_decoder: UIntOptRleDecoder::new(Cursor::new(type_ref_buf)),
242            len_decoder: UIntOptRleDecoder::new(Cursor::new(len_buf)),
243        })
244    }
245
246    fn read_usize(buf: &[u8], idx: &mut usize) -> Result<usize, Error> {
247        if *idx >= buf.len() {
248            return Err(Error::InvalidVarInt);
249        }
250        let mut num: usize = 0;
251        let mut len: usize = 0;
252        loop {
253            let r = buf[*idx];
254            *idx += 1;
255            num |= (r as usize & 127) << len;
256            len += 7;
257            if r < 128 {
258                return Ok(num);
259            }
260            if len > 128 {
261                return Err(Error::InvalidVarInt);
262            }
263        }
264    }
265
266    fn read_buf(buf: &'a [u8], idx: &mut usize) -> Result<&'a [u8], Error> {
267        let len = Self::read_usize(buf, idx)?;
268        let start = *idx;
269        let end = start + len;
270        if end <= buf.len() {
271            let slice = &buf[start..end];
272            *idx += len as usize;
273            Ok(slice)
274        } else {
275            Err(Error::EndOfBuffer(len))
276        }
277    }
278}
279
280impl<'a> Read for DecoderV2<'a> {
281    #[inline]
282    fn read_exact(&mut self, len: usize) -> Result<&[u8], Error> {
283        self.cursor.read_exact(len)
284    }
285
286    #[inline]
287    fn read_u8(&mut self) -> Result<u8, Error> {
288        self.cursor.read_u8()
289    }
290
291    #[inline]
292    fn read_string(&mut self) -> Result<&str, Error> {
293        self.string_decoder.read_str()
294    }
295}
296
297impl<'a> Decoder for DecoderV2<'a> {
298    fn reset_ds_cur_val(&mut self) {
299        self.ds_curr_val = 0;
300    }
301
302    fn read_ds_clock(&mut self) -> Result<u32, Error> {
303        self.ds_curr_val += self.cursor.read_var::<u32>()?;
304        Ok(self.ds_curr_val)
305    }
306
307    fn read_ds_len(&mut self) -> Result<u32, Error> {
308        let diff = self.cursor.read_var::<u32>()? + 1;
309        self.ds_curr_val += diff;
310        Ok(diff)
311    }
312
313    fn read_left_id(&mut self) -> Result<ID, Error> {
314        Ok(ID::new(
315            self.client_decoder.read_u64()? as ClientID,
316            self.left_clock_decoder.read_u32()?,
317        ))
318    }
319
320    fn read_right_id(&mut self) -> Result<ID, Error> {
321        Ok(ID::new(
322            self.client_decoder.read_u64()? as ClientID,
323            self.right_clock_decoder.read_u32()?,
324        ))
325    }
326
327    fn read_client(&mut self) -> Result<ClientID, Error> {
328        Ok(self.client_decoder.read_u64()? as ClientID)
329    }
330
331    fn read_info(&mut self) -> Result<u8, Error> {
332        self.info_decoder.read_u8()
333    }
334
335    fn read_parent_info(&mut self) -> Result<bool, Error> {
336        Ok(self.parent_info_decoder.read_u8()? == 1)
337    }
338
339    fn read_type_ref(&mut self) -> Result<u8, Error> {
340        Ok(self.type_ref_decoder.read_u64()? as u8)
341    }
342
343    fn read_len(&mut self) -> Result<u32, Error> {
344        Ok(self.len_decoder.read_u64()? as u32)
345    }
346
347    fn read_any(&mut self) -> Result<Any, Error> {
348        Any::decode(&mut self.cursor)
349    }
350
351    fn read_json(&mut self) -> Result<Any, Error> {
352        Any::decode(&mut self.cursor)
353    }
354
355    fn read_key(&mut self) -> Result<Arc<str>, Error> {
356        let key_clock = self.key_clock_decoder.read_u32()?;
357        if let Some(key) = self.keys.get(key_clock as usize) {
358            Ok(key.clone())
359        } else {
360            let key: Arc<str> = self.string_decoder.read_str()?.into();
361            self.keys.push(key.clone());
362            Ok(key)
363        }
364    }
365
366    #[inline]
367    fn read_to_end(&mut self) -> Result<&[u8], Error> {
368        Ok(&self.cursor.buf[self.cursor.next..])
369    }
370}
371
372struct IntDiffOptRleDecoder<'a> {
373    cursor: Cursor<'a>,
374    last: u32,
375    count: u32,
376    diff: i32,
377}
378
379impl<'a> IntDiffOptRleDecoder<'a> {
380    fn new(cursor: Cursor<'a>) -> Self {
381        IntDiffOptRleDecoder {
382            cursor,
383            last: 0,
384            count: 0,
385            diff: 0,
386        }
387    }
388
389    fn read_u32(&mut self) -> Result<u32, Error> {
390        if self.count == 0 {
391            let diff = self.cursor.read_var::<i32>()?;
392            // if the first bit is set, we read more data
393            let has_count = diff & 1;
394            self.diff = (diff >> 1) as i32;
395            self.count = if has_count != 0 {
396                self.cursor.read_var::<u32>()? + 2
397            } else {
398                1
399            };
400        }
401        self.last = ((self.last as i32) + self.diff) as u32;
402        self.count -= 1;
403        Ok(self.last)
404    }
405}
406
407struct UIntOptRleDecoder<'a> {
408    cursor: Cursor<'a>,
409    last: u64,
410    count: u32,
411}
412
413impl<'a> UIntOptRleDecoder<'a> {
414    fn new(cursor: Cursor<'a>) -> Self {
415        UIntOptRleDecoder {
416            cursor,
417            last: 0,
418            count: 0,
419        }
420    }
421
422    fn read_u64(&mut self) -> Result<u64, Error> {
423        if self.count == 0 {
424            let s = self.cursor.read_var_signed::<i64>()?;
425            // if the sign is negative, we read the count too, otherwise count is 1
426            let is_negative = s.is_negative();
427            if is_negative {
428                self.count = self.cursor.read_var::<u32>()? + 2;
429                self.last = (-s.value()) as u64;
430            } else {
431                self.count = 1;
432                self.last = s.value() as u64;
433            }
434        }
435        self.count -= 1;
436        Ok(self.last)
437    }
438}
439
440struct RleDecoder<'a> {
441    cursor: Cursor<'a>,
442    last: u8,
443    count: i32,
444}
445
446impl<'a> RleDecoder<'a> {
447    fn new(cursor: Cursor<'a>) -> Self {
448        RleDecoder {
449            cursor,
450            last: 0,
451            count: 0,
452        }
453    }
454
455    fn read_u8(&mut self) -> Result<u8, Error> {
456        if self.count == 0 {
457            self.last = self.cursor.read_u8()?;
458            if self.cursor.has_content() {
459                self.count = (self.cursor.read_var::<u32>()? as i32) + 1; // see encoder implementation for the reason why this is incremented
460            } else {
461                self.count = -1; // read the current value forever
462            }
463        }
464        self.count -= 1;
465        Ok(self.last)
466    }
467}
468
469struct StringDecoder<'a> {
470    buf: &'a str,
471    len_decoder: UIntOptRleDecoder<'a>,
472    pos: usize,
473}
474
475impl<'a> StringDecoder<'a> {
476    fn new(cursor: Cursor<'a>) -> Result<Self, Error> {
477        let buf = cursor.buf;
478        let mut next = cursor.next;
479        let str_bin = DecoderV2::read_buf(buf, &mut next)?;
480        let str = unsafe { std::str::from_utf8_unchecked(str_bin) };
481        let len_decoder = UIntOptRleDecoder::new(Cursor { buf, next });
482        Ok(StringDecoder {
483            pos: 0,
484            buf: str,
485            len_decoder,
486        })
487    }
488
489    fn read_str(&mut self) -> Result<&'a str, Error> {
490        let mut remaining = self.len_decoder.read_u64()? as usize;
491        let mut i = 0;
492        let start = &self.buf[self.pos..];
493        for c in start.chars() {
494            if remaining == 0 {
495                break;
496            }
497            i += c.len_utf8(); // rust uses offsets as utf-8 bytes
498            remaining -= c.len_utf16(); // but yjs provides them as utf-16
499        }
500        let result = &start[..i];
501        self.pos += i;
502        Ok(result)
503    }
504}