fory_core/
buffer.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::error::Error;
19use crate::meta::buffer_rw_string::read_latin1_simd;
20use byteorder::{ByteOrder, LittleEndian, WriteBytesExt};
21use std::cmp::max;
22
23/// Threshold for using SIMD optimizations in string operations.
24/// For buffers smaller than this, direct copy is faster than SIMD setup overhead.
25const SIMD_THRESHOLD: usize = 128;
26
27pub struct Writer<'a> {
28    pub(crate) bf: &'a mut Vec<u8>,
29}
30impl<'a> Writer<'a> {
31    #[inline(always)]
32    pub fn from_buffer(bf: &'a mut Vec<u8>) -> Writer<'a> {
33        Writer { bf }
34    }
35
36    #[inline(always)]
37    pub fn dump(&self) -> Vec<u8> {
38        self.bf.clone()
39    }
40
41    #[inline(always)]
42    pub fn reset(&mut self) {
43        self.bf.clear();
44    }
45
46    #[inline(always)]
47    pub fn len(&self) -> usize {
48        self.bf.len()
49    }
50
51    #[inline(always)]
52    pub fn is_empty(&self) -> bool {
53        self.bf.is_empty()
54    }
55
56    #[inline(always)]
57    pub fn reserve(&mut self, additional: usize) {
58        if self.bf.capacity() - self.len() < additional {
59            self.bf.reserve(max(additional * 2, self.bf.capacity()));
60        }
61    }
62
63    #[inline(always)]
64    pub fn skip(&mut self, len: usize) {
65        self.bf.resize(self.bf.len() + len, 0);
66    }
67
68    #[inline(always)]
69    pub fn set_bytes(&mut self, offset: usize, data: &[u8]) {
70        self.bf
71            .get_mut(offset..offset + data.len())
72            .unwrap()
73            .copy_from_slice(data);
74    }
75
76    #[inline(always)]
77    pub fn write_bytes(&mut self, v: &[u8]) -> usize {
78        self.bf.extend_from_slice(v);
79        v.len()
80    }
81
82    #[inline(always)]
83    pub fn write_u8(&mut self, value: u8) {
84        self.bf.write_u8(value).unwrap();
85    }
86
87    #[inline(always)]
88    pub fn write_i8(&mut self, value: i8) {
89        self.bf.write_i8(value).unwrap();
90    }
91
92    #[inline(always)]
93    pub fn write_u16(&mut self, value: u16) {
94        self.bf.write_u16::<LittleEndian>(value).unwrap();
95    }
96
97    #[inline(always)]
98    pub fn write_i16(&mut self, value: i16) {
99        self.bf.write_i16::<LittleEndian>(value).unwrap();
100    }
101
102    #[inline(always)]
103    pub fn write_u32(&mut self, value: u32) {
104        self.bf.write_u32::<LittleEndian>(value).unwrap();
105    }
106
107    #[inline(always)]
108    pub fn write_i32(&mut self, value: i32) {
109        self.bf.write_i32::<LittleEndian>(value).unwrap();
110    }
111
112    #[inline(always)]
113    pub fn write_f32(&mut self, value: f32) {
114        self.bf.write_f32::<LittleEndian>(value).unwrap();
115    }
116
117    #[inline(always)]
118    pub fn write_i64(&mut self, value: i64) {
119        self.bf.write_i64::<LittleEndian>(value).unwrap();
120    }
121
122    #[inline(always)]
123    pub fn write_f64(&mut self, value: f64) {
124        self.bf.write_f64::<LittleEndian>(value).unwrap();
125    }
126
127    #[inline(always)]
128    pub fn write_u64(&mut self, value: u64) {
129        self.bf.write_u64::<LittleEndian>(value).unwrap();
130    }
131
132    #[inline(always)]
133    pub fn write_varint32(&mut self, value: i32) {
134        let zigzag = ((value as i64) << 1) ^ ((value as i64) >> 31);
135        self._write_varuint32(zigzag as u32)
136    }
137
138    #[inline(always)]
139    pub fn write_varuint32(&mut self, value: u32) {
140        self._write_varuint32(value)
141    }
142
143    #[inline(always)]
144    fn _write_varuint32(&mut self, value: u32) {
145        if value < 0x80 {
146            self.write_u8(value as u8);
147        } else if value < 0x4000 {
148            // 2 bytes
149            let u1 = ((value as u8) & 0x7F) | 0x80;
150            let u2 = (value >> 7) as u8;
151            self.write_u16(((u2 as u16) << 8) | u1 as u16);
152        } else if value < 0x200000 {
153            // 3 bytes
154            let u1 = ((value as u8) & 0x7F) | 0x80;
155            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
156            let u3 = (value >> 14) as u8;
157            self.write_u16(((u2 as u16) << 8) | u1 as u16);
158            self.write_u8(u3);
159        } else if value < 0x10000000 {
160            // 4 bytes
161            let u1 = ((value as u8) & 0x7F) | 0x80;
162            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
163            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
164            let u4 = (value >> 21) as u8;
165            self.write_u32(
166                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
167            );
168        } else {
169            // 5 bytes
170            let u1 = ((value as u8) & 0x7F) | 0x80;
171            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
172            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
173            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
174            let u5 = (value >> 28) as u8;
175            self.write_u32(
176                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
177            );
178            self.write_u8(u5);
179        }
180    }
181
182    #[inline(always)]
183    pub fn write_varint64(&mut self, value: i64) {
184        let zigzag = ((value << 1) ^ (value >> 63)) as u64;
185        self._write_varuint64(zigzag);
186    }
187
188    #[inline(always)]
189    pub fn write_varuint64(&mut self, value: u64) {
190        self._write_varuint64(value);
191    }
192
193    #[inline(always)]
194    fn _write_varuint64(&mut self, value: u64) {
195        if value < 0x80 {
196            self.write_u8(value as u8);
197        } else if value < 0x4000 {
198            let u1 = ((value as u8) & 0x7F) | 0x80;
199            let u2 = (value >> 7) as u8;
200            self.write_u16(((u2 as u16) << 8) | u1 as u16);
201        } else if value < 0x200000 {
202            let u1 = ((value as u8) & 0x7F) | 0x80;
203            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
204            let u3 = (value >> 14) as u8;
205            self.write_u16(((u2 as u16) << 8) | u1 as u16);
206            self.write_u8(u3);
207        } else if value < 0x10000000 {
208            let u1 = ((value as u8) & 0x7F) | 0x80;
209            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
210            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
211            let u4 = (value >> 21) as u8;
212            self.write_u32(
213                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
214            );
215        } else if value < 0x800000000 {
216            let u1 = ((value as u8) & 0x7F) | 0x80;
217            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
218            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
219            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
220            let u5 = (value >> 28) as u8;
221            self.write_u32(
222                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
223            );
224            self.write_u8(u5);
225        } else if value < 0x40000000000 {
226            let u1 = ((value as u8) & 0x7F) | 0x80;
227            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
228            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
229            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
230            let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
231            let u6 = (value >> 35) as u8;
232            self.write_u32(
233                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
234            );
235            self.write_u16(((u6 as u16) << 8) | u5 as u16);
236        } else if value < 0x2000000000000 {
237            let u1 = ((value as u8) & 0x7F) | 0x80;
238            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
239            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
240            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
241            let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
242            let u6 = (((value >> 35) as u8) & 0x7F) | 0x80;
243            let u7 = (value >> 42) as u8;
244            self.write_u32(
245                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
246            );
247            self.write_u16(((u6 as u16) << 8) | u5 as u16);
248            self.write_u8(u7);
249        } else if value < 0x100000000000000 {
250            let u1 = ((value as u8) & 0x7F) | 0x80;
251            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
252            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
253            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
254            let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
255            let u6 = (((value >> 35) as u8) & 0x7F) | 0x80;
256            let u7 = (((value >> 42) as u8) & 0x7F) | 0x80;
257            let u8 = (value >> 49) as u8;
258            self.write_u64(
259                (u8 as u64) << 56
260                    | (u7 as u64) << 48
261                    | (u6 as u64) << 40
262                    | (u5 as u64) << 32
263                    | (u4 as u64) << 24
264                    | (u3 as u64) << 16
265                    | (u2 as u64) << 8
266                    | (u1 as u64),
267            );
268        } else {
269            let u1 = ((value as u8) & 0x7F) | 0x80;
270            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
271            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
272            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
273            let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
274            let u6 = (((value >> 35) as u8) & 0x7F) | 0x80;
275            let u7 = (((value >> 42) as u8) & 0x7F) | 0x80;
276            let u8 = (((value >> 49) as u8) & 0x7F) | 0x80;
277            let u9 = (value >> 56) as u8;
278            self.write_u64(
279                (u8 as u64) << 56
280                    | (u7 as u64) << 48
281                    | (u6 as u64) << 40
282                    | (u5 as u64) << 32
283                    | (u4 as u64) << 24
284                    | (u3 as u64) << 16
285                    | (u2 as u64) << 8
286                    | (u1 as u64),
287            );
288            self.write_u8(u9);
289        }
290    }
291
292    #[inline(always)]
293    pub fn write_varuint36_small(&mut self, value: u64) {
294        assert!(value < (1u64 << 36), "value too large for 36-bit varint");
295        if value < 0x80 {
296            self.write_u8(value as u8);
297        } else if value < 0x4000 {
298            let b0 = ((value & 0x7F) as u8) | 0x80;
299            let b1 = (value >> 7) as u8;
300            let combined = ((b1 as u16) << 8) | (b0 as u16);
301            self.write_u16(combined);
302        } else if value < 0x200000 {
303            let b0 = (value & 0x7F) | 0x80;
304            let b1 = ((value >> 7) & 0x7F) | 0x80;
305            let b2 = value >> 14;
306            let combined = b0 | (b1 << 8) | (b2 << 16);
307            self.write_u32(combined as u32);
308        } else if value < 0x10000000 {
309            let b0 = (value & 0x7F) | 0x80;
310            let b1 = ((value >> 7) & 0x7F) | 0x80;
311            let b2 = ((value >> 14) & 0x7F) | 0x80;
312            let b3 = value >> 21;
313            let combined = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
314            self.write_u32(combined as u32);
315        } else {
316            let b0 = (value & 0x7F) | 0x80;
317            let b1 = ((value >> 7) & 0x7F) | 0x80;
318            let b2 = ((value >> 14) & 0x7F) | 0x80;
319            let b3 = ((value >> 21) & 0x7F) | 0x80;
320            let b4 = value >> 28;
321            let combined = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24) | (b4 << 32);
322            self.write_u64(combined);
323        }
324    }
325
326    #[inline(always)]
327    pub fn write_utf8_string(&mut self, s: &str) {
328        let bytes = s.as_bytes();
329        let len = bytes.len();
330        self.bf.reserve(len);
331        self.bf.extend_from_slice(bytes);
332    }
333}
334
335#[derive(Default)]
336#[allow(clippy::needless_lifetimes)]
337pub struct Reader<'a> {
338    pub(crate) bf: &'a [u8],
339    pub(crate) cursor: usize,
340}
341
342#[allow(clippy::needless_lifetimes)]
343impl<'a> Reader<'a> {
344    #[inline(always)]
345    pub fn new(bf: &[u8]) -> Reader<'_> {
346        Reader { bf, cursor: 0 }
347    }
348
349    #[inline(always)]
350    pub(crate) fn move_next(&mut self, additional: usize) {
351        self.cursor += additional;
352    }
353
354    #[inline(always)]
355    pub(crate) fn move_back(&mut self, additional: usize) {
356        self.cursor -= additional;
357    }
358
359    #[inline(always)]
360    pub fn sub_slice(&self, start: usize, end: usize) -> Result<&[u8], Error> {
361        if start >= self.bf.len() || end > self.bf.len() || end < start {
362            Err(Error::buffer_out_of_bound(
363                start,
364                self.bf.len(),
365                self.bf.len(),
366            ))
367        } else {
368            Ok(&self.bf[start..end])
369        }
370    }
371
372    #[inline(always)]
373    pub fn slice_after_cursor(&self) -> &[u8] {
374        &self.bf[self.cursor..]
375    }
376
377    #[inline(always)]
378    pub fn get_cursor(&self) -> usize {
379        self.cursor
380    }
381
382    #[inline(always)]
383    fn value_at(&self, index: usize) -> Result<u8, Error> {
384        match self.bf.get(index) {
385            None => Err(Error::buffer_out_of_bound(
386                index,
387                self.bf.len(),
388                self.bf.len(),
389            )),
390            Some(v) => Ok(*v),
391        }
392    }
393
394    #[inline(always)]
395    fn check_bound(&self, n: usize) -> Result<(), Error> {
396        // The upper layer guarantees it is non-null
397        // if self.bf.is_null() {
398        //     return Err(Error::invalid_data("buffer pointer is null"));
399        // }
400        if self.cursor + n > self.bf.len() {
401            Err(Error::buffer_out_of_bound(self.cursor, n, self.bf.len()))
402        } else {
403            Ok(())
404        }
405    }
406
407    #[inline(always)]
408    pub fn read_bool(&mut self) -> Result<bool, Error> {
409        Ok(self.read_u8()? != 0)
410    }
411
412    #[inline(always)]
413    pub fn read_u8_uncheck(&mut self) -> u8 {
414        let result = unsafe { self.bf.get_unchecked(self.cursor) };
415        self.move_next(1);
416        *result
417    }
418
419    #[inline(always)]
420    pub fn read_u8(&mut self) -> Result<u8, Error> {
421        let result = self.value_at(self.cursor)?;
422        self.move_next(1);
423        Ok(result)
424    }
425
426    #[inline(always)]
427    pub fn read_i8(&mut self) -> Result<i8, Error> {
428        Ok(self.read_u8()? as i8)
429    }
430
431    #[inline(always)]
432    pub fn read_u16(&mut self) -> Result<u16, Error> {
433        let slice = self.slice_after_cursor();
434        let result = LittleEndian::read_u16(slice);
435        self.move_next(2);
436        Ok(result)
437    }
438
439    #[inline(always)]
440    pub fn read_i16(&mut self) -> Result<i16, Error> {
441        Ok(self.read_u16()? as i16)
442    }
443
444    #[inline(always)]
445    pub fn read_u32(&mut self) -> Result<u32, Error> {
446        let slice = self.slice_after_cursor();
447        let result = LittleEndian::read_u32(slice);
448        self.move_next(4);
449        Ok(result)
450    }
451
452    #[inline(always)]
453    pub fn read_i32(&mut self) -> Result<i32, Error> {
454        Ok(self.read_u32()? as i32)
455    }
456
457    #[inline(always)]
458    pub fn read_u64(&mut self) -> Result<u64, Error> {
459        let slice = self.slice_after_cursor();
460        let result = LittleEndian::read_u64(slice);
461        self.move_next(8);
462        Ok(result)
463    }
464
465    #[inline(always)]
466    pub fn read_i64(&mut self) -> Result<i64, Error> {
467        Ok(self.read_u64()? as i64)
468    }
469
470    #[inline(always)]
471    pub fn read_f32(&mut self) -> Result<f32, Error> {
472        let slice = self.slice_after_cursor();
473        let result = LittleEndian::read_f32(slice);
474        self.move_next(4);
475        Ok(result)
476    }
477
478    #[inline(always)]
479    pub fn read_f64(&mut self) -> Result<f64, Error> {
480        let slice = self.slice_after_cursor();
481        let result = LittleEndian::read_f64(slice);
482        self.move_next(8);
483        Ok(result)
484    }
485
486    #[inline(always)]
487    pub fn read_varuint32(&mut self) -> Result<u32, Error> {
488        let b0 = self.value_at(self.cursor)? as u32;
489        if b0 < 0x80 {
490            self.move_next(1);
491            return Ok(b0);
492        }
493
494        let b1 = self.value_at(self.cursor + 1)? as u32;
495        let mut encoded = (b0 & 0x7F) | ((b1 & 0x7F) << 7);
496        if b1 < 0x80 {
497            self.move_next(2);
498            return Ok(encoded);
499        }
500
501        let b2 = self.value_at(self.cursor + 2)? as u32;
502        encoded |= (b2 & 0x7F) << 14;
503        if b2 < 0x80 {
504            self.move_next(3);
505            return Ok(encoded);
506        }
507
508        let b3 = self.value_at(self.cursor + 3)? as u32;
509        encoded |= (b3 & 0x7F) << 21;
510        if b3 < 0x80 {
511            self.move_next(4);
512            return Ok(encoded);
513        }
514
515        let b4 = self.value_at(self.cursor + 4)? as u32;
516        encoded |= b4 << 28;
517        self.move_next(5);
518        Ok(encoded)
519    }
520
521    #[inline(always)]
522    pub fn read_varint32(&mut self) -> Result<i32, Error> {
523        let encoded = self.read_varuint32()?;
524        Ok(((encoded >> 1) as i32) ^ -((encoded & 1) as i32))
525    }
526
527    #[inline(always)]
528    pub fn read_varuint64(&mut self) -> Result<u64, Error> {
529        let b0 = self.value_at(self.cursor)? as u64;
530        if b0 < 0x80 {
531            self.move_next(1);
532            return Ok(b0);
533        }
534
535        let b1 = self.value_at(self.cursor + 1)? as u64;
536        let mut var64 = (b0 & 0x7F) | ((b1 & 0x7F) << 7);
537        if b1 < 0x80 {
538            self.move_next(2);
539            return Ok(var64);
540        }
541
542        let b2 = self.value_at(self.cursor + 2)? as u64;
543        var64 |= (b2 & 0x7F) << 14;
544        if b2 < 0x80 {
545            self.move_next(3);
546            return Ok(var64);
547        }
548
549        let b3 = self.value_at(self.cursor + 3)? as u64;
550        var64 |= (b3 & 0x7F) << 21;
551        if b3 < 0x80 {
552            self.move_next(4);
553            return Ok(var64);
554        }
555
556        let b4 = self.value_at(self.cursor + 4)? as u64;
557        var64 |= (b4 & 0x7F) << 28;
558        if b4 < 0x80 {
559            self.move_next(5);
560            return Ok(var64);
561        }
562
563        let b5 = self.value_at(self.cursor + 5)? as u64;
564        var64 |= (b5 & 0x7F) << 35;
565        if b5 < 0x80 {
566            self.move_next(6);
567            return Ok(var64);
568        }
569
570        let b6 = self.value_at(self.cursor + 6)? as u64;
571        var64 |= (b6 & 0x7F) << 42;
572        if b6 < 0x80 {
573            self.move_next(7);
574            return Ok(var64);
575        }
576
577        let b7 = self.value_at(self.cursor + 7)? as u64;
578        var64 |= (b7 & 0x7F) << 49;
579        if b7 < 0x80 {
580            self.move_next(8);
581            return Ok(var64);
582        }
583
584        let b8 = self.value_at(self.cursor + 8)? as u64;
585        var64 |= (b8 & 0xFF) << 56;
586        self.move_next(9);
587        Ok(var64)
588    }
589
590    #[inline(always)]
591    pub fn read_varint64(&mut self) -> Result<i64, Error> {
592        let encoded = self.read_varuint64()?;
593        Ok(((encoded >> 1) as i64) ^ -((encoded & 1) as i64))
594    }
595
596    #[inline(always)]
597    pub fn read_latin1_string(&mut self, len: usize) -> Result<String, Error> {
598        self.check_bound(len)?;
599        if len < SIMD_THRESHOLD {
600            // Fast path for small buffers
601            unsafe {
602                let src = self.sub_slice(self.cursor, self.cursor + len)?;
603
604                // Check if all bytes are ASCII (< 0x80)
605                let is_ascii = src.iter().all(|&b| b < 0x80);
606
607                if is_ascii {
608                    // ASCII fast path: Latin1 == UTF-8, direct copy
609                    let mut vec = Vec::with_capacity(len);
610                    let dst = vec.as_mut_ptr();
611                    std::ptr::copy_nonoverlapping(src.as_ptr(), dst, len);
612                    vec.set_len(len);
613                    self.move_next(len);
614                    Ok(String::from_utf8_unchecked(vec))
615                } else {
616                    // Contains Latin1 bytes (0x80-0xFF): must convert to UTF-8
617                    let mut out: Vec<u8> = Vec::with_capacity(len * 2);
618                    let out_ptr = out.as_mut_ptr();
619                    let mut out_len = 0;
620
621                    for &b in src {
622                        if b < 0x80 {
623                            *out_ptr.add(out_len) = b;
624                            out_len += 1;
625                        } else {
626                            // Latin1 -> UTF-8 encoding
627                            *out_ptr.add(out_len) = 0xC0 | (b >> 6);
628                            *out_ptr.add(out_len + 1) = 0x80 | (b & 0x3F);
629                            out_len += 2;
630                        }
631                    }
632
633                    out.set_len(out_len);
634                    self.move_next(len);
635                    Ok(String::from_utf8_unchecked(out))
636                }
637            }
638        } else {
639            // Use SIMD for larger strings where the overhead is amortized
640            read_latin1_simd(self, len)
641        }
642    }
643
644    #[inline(always)]
645    pub fn read_utf8_string(&mut self, len: usize) -> Result<String, Error> {
646        self.check_bound(len)?;
647        // don't use simd for memory copy, copy_non_overlapping is faster
648        unsafe {
649            let mut vec = Vec::with_capacity(len);
650            let src = self.bf.as_ptr().add(self.cursor);
651            let dst = vec.as_mut_ptr();
652            // Use fastest possible copy - copy_nonoverlapping compiles to memcpy
653            std::ptr::copy_nonoverlapping(src, dst, len);
654            vec.set_len(len);
655            self.move_next(len);
656            // SAFETY: Assuming valid UTF-8 bytes (responsibility of serialization protocol)
657            Ok(String::from_utf8_unchecked(vec))
658        }
659    }
660
661    #[inline(always)]
662    pub fn read_utf16_string(&mut self, len: usize) -> Result<String, Error> {
663        self.check_bound(len)?;
664        let slice = self.sub_slice(self.cursor, self.cursor + len)?;
665        let units: Vec<u16> = slice
666            .chunks_exact(2)
667            .map(|c| u16::from_le_bytes([c[0], c[1]]))
668            .collect();
669        self.move_next(len);
670        Ok(String::from_utf16_lossy(&units))
671    }
672
673    #[inline(always)]
674    pub fn read_varuint36small(&mut self) -> Result<u64, Error> {
675        let start = self.cursor;
676        let slice = self.slice_after_cursor();
677
678        if slice.len() >= 8 {
679            // here already check bound
680            let bulk = self.read_u64()?;
681            let mut result = bulk & 0x7F;
682            let mut read_idx = start;
683
684            if (bulk & 0x80) != 0 {
685                read_idx += 1;
686                result |= (bulk >> 1) & 0x3F80;
687                if (bulk & 0x8000) != 0 {
688                    read_idx += 1;
689                    result |= (bulk >> 2) & 0x1FC000;
690                    if (bulk & 0x800000) != 0 {
691                        read_idx += 1;
692                        result |= (bulk >> 3) & 0xFE00000;
693                        if (bulk & 0x80000000) != 0 {
694                            read_idx += 1;
695                            result |= (bulk >> 4) & 0xFF0000000;
696                        }
697                    }
698                }
699            }
700            self.cursor = read_idx + 1;
701            return Ok(result);
702        }
703
704        let mut result = 0u64;
705        let mut shift = 0;
706        while self.cursor < self.bf.len() {
707            let b = self.read_u8_uncheck();
708            result |= ((b & 0x7F) as u64) << shift;
709            if (b & 0x80) == 0 {
710                break;
711            }
712            shift += 7;
713            if shift >= 36 {
714                return Err(Error::encode_error("varuint36small overflow"));
715            }
716        }
717        Ok(result)
718    }
719
720    #[inline(always)]
721    pub fn skip(&mut self, len: usize) -> Result<(), Error> {
722        self.check_bound(len)?;
723        self.move_next(len);
724        Ok(())
725    }
726
727    #[inline(always)]
728    pub fn read_bytes(&mut self, len: usize) -> Result<&[u8], Error> {
729        self.check_bound(len)?;
730        let result = &self.bf[self.cursor..self.cursor + len];
731        self.move_next(len);
732        Ok(result)
733    }
734
735    #[inline(always)]
736    pub fn reset_cursor_to_here(&self) -> impl FnOnce(&mut Self) {
737        let raw_cursor = self.cursor;
738        move |this: &mut Self| {
739            this.cursor = raw_cursor;
740        }
741    }
742
743    pub fn set_cursor(&mut self, cursor: usize) {
744        self.cursor = cursor;
745    }
746}
747
748#[allow(clippy::needless_lifetimes)]
749unsafe impl<'a> Send for Reader<'a> {}
750#[allow(clippy::needless_lifetimes)]
751unsafe impl<'a> Sync for Reader<'a> {}