fory_core/
buffer.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::error::Error;
19use crate::meta::buffer_rw_string::read_latin1_simd;
20use byteorder::{ByteOrder, LittleEndian};
21use std::cmp::max;
22
23/// Threshold for using SIMD optimizations in string operations.
24/// For buffers smaller than this, direct copy is faster than SIMD setup overhead.
25const SIMD_THRESHOLD: usize = 128;
26
27pub struct Writer<'a> {
28    pub(crate) bf: &'a mut Vec<u8>,
29}
30impl<'a> Writer<'a> {
31    #[inline(always)]
32    pub fn from_buffer(bf: &'a mut Vec<u8>) -> Writer<'a> {
33        Writer { bf }
34    }
35
36    #[inline(always)]
37    pub fn dump(&self) -> Vec<u8> {
38        self.bf.clone()
39    }
40
41    #[inline(always)]
42    pub fn reset(&mut self) {
43        self.bf.clear();
44    }
45
46    #[inline(always)]
47    pub fn len(&self) -> usize {
48        self.bf.len()
49    }
50
51    #[inline(always)]
52    pub fn is_empty(&self) -> bool {
53        self.bf.is_empty()
54    }
55
56    #[inline(always)]
57    pub fn reserve(&mut self, additional: usize) {
58        if self.bf.capacity() - self.len() < additional {
59            self.bf.reserve(max(additional * 2, self.bf.capacity()));
60        }
61    }
62
63    #[inline(always)]
64    pub fn skip(&mut self, len: usize) {
65        self.bf.resize(self.bf.len() + len, 0);
66    }
67
68    #[inline(always)]
69    pub fn set_bytes(&mut self, offset: usize, data: &[u8]) {
70        self.bf
71            .get_mut(offset..offset + data.len())
72            .unwrap()
73            .copy_from_slice(data);
74    }
75
76    #[inline(always)]
77    pub fn write_bytes(&mut self, v: &[u8]) -> usize {
78        self.bf.extend_from_slice(v);
79        v.len()
80    }
81
82    #[inline(always)]
83    pub fn write_bool(&mut self, value: bool) {
84        self.bf.push(if value { 1 } else { 0 });
85    }
86
87    #[inline(always)]
88    pub fn write_u8(&mut self, value: u8) {
89        self.bf.push(value);
90    }
91
92    #[inline(always)]
93    pub fn write_i8(&mut self, value: i8) {
94        self.bf.push(value as u8);
95    }
96
97    #[inline(always)]
98    pub fn write_u16(&mut self, value: u16) {
99        #[cfg(target_endian = "little")]
100        {
101            let bytes = unsafe { &*(&value as *const u16 as *const [u8; 2]) };
102            self.bf.extend_from_slice(bytes);
103        }
104        #[cfg(target_endian = "big")]
105        {
106            self.bf.extend_from_slice(&value.to_le_bytes());
107        }
108    }
109
110    #[inline(always)]
111    pub fn write_i16(&mut self, value: i16) {
112        self.write_u16(value as u16);
113    }
114
115    #[inline(always)]
116    pub fn write_u32(&mut self, value: u32) {
117        #[cfg(target_endian = "little")]
118        {
119            let bytes = unsafe { &*(&value as *const u32 as *const [u8; 4]) };
120            self.bf.extend_from_slice(bytes);
121        }
122        #[cfg(target_endian = "big")]
123        {
124            self.bf.extend_from_slice(&value.to_le_bytes());
125        }
126    }
127
128    #[inline(always)]
129    pub fn write_i32(&mut self, value: i32) {
130        self.write_u32(value as u32);
131    }
132
133    #[inline(always)]
134    pub fn write_f32(&mut self, value: f32) {
135        #[cfg(target_endian = "little")]
136        {
137            let bytes = unsafe { &*(&value as *const f32 as *const [u8; 4]) };
138            self.bf.extend_from_slice(bytes);
139        }
140        #[cfg(target_endian = "big")]
141        {
142            self.bf.extend_from_slice(&value.to_bits().to_le_bytes());
143        }
144    }
145
146    #[inline(always)]
147    pub fn write_i64(&mut self, value: i64) {
148        self.write_u64(value as u64);
149    }
150
151    #[inline(always)]
152    pub fn write_f64(&mut self, value: f64) {
153        #[cfg(target_endian = "little")]
154        {
155            let bytes = unsafe { &*(&value as *const f64 as *const [u8; 8]) };
156            self.bf.extend_from_slice(bytes);
157        }
158        #[cfg(target_endian = "big")]
159        {
160            self.bf.extend_from_slice(&value.to_bits().to_le_bytes());
161        }
162    }
163
164    #[inline(always)]
165    pub fn write_u64(&mut self, value: u64) {
166        #[cfg(target_endian = "little")]
167        {
168            let bytes = unsafe { &*(&value as *const u64 as *const [u8; 8]) };
169            self.bf.extend_from_slice(bytes);
170        }
171        #[cfg(target_endian = "big")]
172        {
173            self.bf.extend_from_slice(&value.to_le_bytes());
174        }
175    }
176
177    #[inline(always)]
178    pub fn write_usize(&mut self, value: usize) {
179        self.write_u64(value as u64);
180    }
181
182    #[inline(always)]
183    pub fn write_u128(&mut self, value: u128) {
184        #[cfg(target_endian = "little")]
185        {
186            let bytes = unsafe { &*(&value as *const u128 as *const [u8; 16]) };
187            self.bf.extend_from_slice(bytes);
188        }
189        #[cfg(target_endian = "big")]
190        {
191            self.bf.extend_from_slice(&value.to_le_bytes());
192        }
193    }
194
195    #[inline(always)]
196    pub fn write_varint32(&mut self, value: i32) {
197        let zigzag = ((value as i64) << 1) ^ ((value as i64) >> 31);
198        self._write_varuint32(zigzag as u32)
199    }
200
201    #[inline(always)]
202    pub fn write_varuint32(&mut self, value: u32) {
203        self._write_varuint32(value)
204    }
205
206    #[inline(always)]
207    fn _write_varuint32(&mut self, value: u32) {
208        if value < 0x80 {
209            self.bf.push(value as u8);
210        } else if value < 0x4000 {
211            // 2 bytes
212            let u1 = ((value as u8) & 0x7F) | 0x80;
213            let u2 = (value >> 7) as u8;
214            self.write_u16(((u2 as u16) << 8) | u1 as u16);
215        } else if value < 0x200000 {
216            // 3 bytes
217            let u1 = ((value as u8) & 0x7F) | 0x80;
218            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
219            let u3 = (value >> 14) as u8;
220            self.write_u16(((u2 as u16) << 8) | u1 as u16);
221            self.bf.push(u3);
222        } else if value < 0x10000000 {
223            // 4 bytes
224            let u1 = ((value as u8) & 0x7F) | 0x80;
225            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
226            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
227            let u4 = (value >> 21) as u8;
228            self.write_u32(
229                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
230            );
231        } else {
232            // 5 bytes
233            let u1 = ((value as u8) & 0x7F) | 0x80;
234            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
235            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
236            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
237            let u5 = (value >> 28) as u8;
238            self.write_u32(
239                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
240            );
241            self.bf.push(u5);
242        }
243    }
244
245    #[inline(always)]
246    pub fn write_varint64(&mut self, value: i64) {
247        let zigzag = ((value << 1) ^ (value >> 63)) as u64;
248        self._write_varuint64(zigzag);
249    }
250
251    #[inline(always)]
252    pub fn write_varuint64(&mut self, value: u64) {
253        self._write_varuint64(value);
254    }
255
256    #[inline(always)]
257    fn _write_varuint64(&mut self, value: u64) {
258        if value < 0x80 {
259            self.bf.push(value as u8);
260        } else if value < 0x4000 {
261            let u1 = ((value as u8) & 0x7F) | 0x80;
262            let u2 = (value >> 7) as u8;
263            self.write_u16(((u2 as u16) << 8) | u1 as u16);
264        } else if value < 0x200000 {
265            let u1 = ((value as u8) & 0x7F) | 0x80;
266            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
267            let u3 = (value >> 14) as u8;
268            self.write_u16(((u2 as u16) << 8) | u1 as u16);
269            self.bf.push(u3);
270        } else if value < 0x10000000 {
271            let u1 = ((value as u8) & 0x7F) | 0x80;
272            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
273            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
274            let u4 = (value >> 21) as u8;
275            self.write_u32(
276                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
277            );
278        } else if value < 0x800000000 {
279            let u1 = ((value as u8) & 0x7F) | 0x80;
280            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
281            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
282            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
283            let u5 = (value >> 28) as u8;
284            self.write_u32(
285                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
286            );
287            self.bf.push(u5);
288        } else if value < 0x40000000000 {
289            let u1 = ((value as u8) & 0x7F) | 0x80;
290            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
291            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
292            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
293            let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
294            let u6 = (value >> 35) as u8;
295            self.write_u32(
296                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
297            );
298            self.write_u16(((u6 as u16) << 8) | u5 as u16);
299        } else if value < 0x2000000000000 {
300            let u1 = ((value as u8) & 0x7F) | 0x80;
301            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
302            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
303            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
304            let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
305            let u6 = (((value >> 35) as u8) & 0x7F) | 0x80;
306            let u7 = (value >> 42) as u8;
307            self.write_u32(
308                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
309            );
310            self.write_u16(((u6 as u16) << 8) | u5 as u16);
311            self.bf.push(u7);
312        } else if value < 0x100000000000000 {
313            let u1 = ((value as u8) & 0x7F) | 0x80;
314            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
315            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
316            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
317            let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
318            let u6 = (((value >> 35) as u8) & 0x7F) | 0x80;
319            let u7 = (((value >> 42) as u8) & 0x7F) | 0x80;
320            let u8 = (value >> 49) as u8;
321            self.write_u64(
322                (u8 as u64) << 56
323                    | (u7 as u64) << 48
324                    | (u6 as u64) << 40
325                    | (u5 as u64) << 32
326                    | (u4 as u64) << 24
327                    | (u3 as u64) << 16
328                    | (u2 as u64) << 8
329                    | (u1 as u64),
330            );
331        } else {
332            let u1 = ((value as u8) & 0x7F) | 0x80;
333            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
334            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
335            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
336            let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
337            let u6 = (((value >> 35) as u8) & 0x7F) | 0x80;
338            let u7 = (((value >> 42) as u8) & 0x7F) | 0x80;
339            let u8 = (((value >> 49) as u8) & 0x7F) | 0x80;
340            let u9 = (value >> 56) as u8;
341            self.write_u64(
342                (u8 as u64) << 56
343                    | (u7 as u64) << 48
344                    | (u6 as u64) << 40
345                    | (u5 as u64) << 32
346                    | (u4 as u64) << 24
347                    | (u3 as u64) << 16
348                    | (u2 as u64) << 8
349                    | (u1 as u64),
350            );
351            self.bf.push(u9);
352        }
353    }
354
355    #[inline(always)]
356    pub fn write_varuint36_small(&mut self, value: u64) {
357        assert!(value < (1u64 << 36), "value too large for 36-bit varint");
358        if value < 0x80 {
359            self.bf.push(value as u8);
360        } else if value < 0x4000 {
361            let b0 = ((value & 0x7F) as u8) | 0x80;
362            let b1 = (value >> 7) as u8;
363            let combined = ((b1 as u16) << 8) | (b0 as u16);
364            self.write_u16(combined);
365        } else if value < 0x200000 {
366            let b0 = (value & 0x7F) | 0x80;
367            let b1 = ((value >> 7) & 0x7F) | 0x80;
368            let b2 = value >> 14;
369            let combined = b0 | (b1 << 8) | (b2 << 16);
370            self.write_u32(combined as u32);
371        } else if value < 0x10000000 {
372            let b0 = (value & 0x7F) | 0x80;
373            let b1 = ((value >> 7) & 0x7F) | 0x80;
374            let b2 = ((value >> 14) & 0x7F) | 0x80;
375            let b3 = value >> 21;
376            let combined = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
377            self.write_u32(combined as u32);
378        } else {
379            let b0 = (value & 0x7F) | 0x80;
380            let b1 = ((value >> 7) & 0x7F) | 0x80;
381            let b2 = ((value >> 14) & 0x7F) | 0x80;
382            let b3 = ((value >> 21) & 0x7F) | 0x80;
383            let b4 = value >> 28;
384            let combined = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24) | (b4 << 32);
385            self.write_u64(combined);
386        }
387    }
388
389    #[inline(always)]
390    pub fn write_utf8_string(&mut self, s: &str) {
391        let bytes = s.as_bytes();
392        let len = bytes.len();
393        self.bf.reserve(len);
394        self.bf.extend_from_slice(bytes);
395    }
396}
397
398#[derive(Default)]
399#[allow(clippy::needless_lifetimes)]
400pub struct Reader<'a> {
401    pub(crate) bf: &'a [u8],
402    pub(crate) cursor: usize,
403}
404
405#[allow(clippy::needless_lifetimes)]
406impl<'a> Reader<'a> {
407    #[inline(always)]
408    pub fn new(bf: &[u8]) -> Reader<'_> {
409        Reader { bf, cursor: 0 }
410    }
411
412    #[inline(always)]
413    pub(crate) fn move_next(&mut self, additional: usize) {
414        self.cursor += additional;
415    }
416
417    #[inline(always)]
418    pub(crate) fn move_back(&mut self, additional: usize) {
419        self.cursor -= additional;
420    }
421
422    #[inline(always)]
423    pub fn sub_slice(&self, start: usize, end: usize) -> Result<&[u8], Error> {
424        if start >= self.bf.len() || end > self.bf.len() || end < start {
425            Err(Error::buffer_out_of_bound(
426                start,
427                self.bf.len(),
428                self.bf.len(),
429            ))
430        } else {
431            Ok(&self.bf[start..end])
432        }
433    }
434
435    #[inline(always)]
436    pub fn slice_after_cursor(&self) -> &[u8] {
437        &self.bf[self.cursor..]
438    }
439
440    #[inline(always)]
441    pub fn get_cursor(&self) -> usize {
442        self.cursor
443    }
444
445    #[inline(always)]
446    fn value_at(&self, index: usize) -> Result<u8, Error> {
447        match self.bf.get(index) {
448            None => Err(Error::buffer_out_of_bound(
449                index,
450                self.bf.len(),
451                self.bf.len(),
452            )),
453            Some(v) => Ok(*v),
454        }
455    }
456
457    #[inline(always)]
458    fn check_bound(&self, n: usize) -> Result<(), Error> {
459        // The upper layer guarantees it is non-null
460        // if self.bf.is_null() {
461        //     return Err(Error::invalid_data("buffer pointer is null"));
462        // }
463        if self.cursor + n > self.bf.len() {
464            Err(Error::buffer_out_of_bound(self.cursor, n, self.bf.len()))
465        } else {
466            Ok(())
467        }
468    }
469
470    #[inline(always)]
471    pub fn read_bool(&mut self) -> Result<bool, Error> {
472        Ok(self.read_u8()? != 0)
473    }
474
475    #[inline(always)]
476    fn read_u8_uncheck(&mut self) -> u8 {
477        let result = unsafe { self.bf.get_unchecked(self.cursor) };
478        self.move_next(1);
479        *result
480    }
481
482    #[inline(always)]
483    pub fn peek_u8(&mut self) -> Result<u8, Error> {
484        let result = self.value_at(self.cursor)?;
485        Ok(result)
486    }
487
488    #[inline(always)]
489    pub fn read_u8(&mut self) -> Result<u8, Error> {
490        let result = self.value_at(self.cursor)?;
491        self.move_next(1);
492        Ok(result)
493    }
494
495    #[inline(always)]
496    pub fn read_i8(&mut self) -> Result<i8, Error> {
497        Ok(self.read_u8()? as i8)
498    }
499
500    #[inline(always)]
501    pub fn read_u16(&mut self) -> Result<u16, Error> {
502        let slice = self.slice_after_cursor();
503        let result = LittleEndian::read_u16(slice);
504        self.cursor += 2;
505        Ok(result)
506    }
507
508    #[inline(always)]
509    pub fn read_i16(&mut self) -> Result<i16, Error> {
510        Ok(self.read_u16()? as i16)
511    }
512
513    #[inline(always)]
514    pub fn read_u32(&mut self) -> Result<u32, Error> {
515        let slice = self.slice_after_cursor();
516        let result = LittleEndian::read_u32(slice);
517        self.cursor += 4;
518        Ok(result)
519    }
520
521    #[inline(always)]
522    pub fn read_i32(&mut self) -> Result<i32, Error> {
523        Ok(self.read_u32()? as i32)
524    }
525
526    #[inline(always)]
527    pub fn read_u64(&mut self) -> Result<u64, Error> {
528        let slice = self.slice_after_cursor();
529        let result = LittleEndian::read_u64(slice);
530        self.cursor += 8;
531        Ok(result)
532    }
533
534    #[inline(always)]
535    pub fn read_usize(&mut self) -> Result<usize, Error> {
536        Ok(self.read_u64()? as usize)
537    }
538
539    #[inline(always)]
540    pub fn read_u128(&mut self) -> Result<u128, Error> {
541        let slice = self.slice_after_cursor();
542        let result = LittleEndian::read_u128(slice);
543        self.cursor += 16;
544        Ok(result)
545    }
546
547    #[inline(always)]
548    pub fn read_i64(&mut self) -> Result<i64, Error> {
549        Ok(self.read_u64()? as i64)
550    }
551
552    #[inline(always)]
553    pub fn read_f32(&mut self) -> Result<f32, Error> {
554        let slice = self.slice_after_cursor();
555        let result = LittleEndian::read_f32(slice);
556        self.cursor += 4;
557        Ok(result)
558    }
559
560    #[inline(always)]
561    pub fn read_f64(&mut self) -> Result<f64, Error> {
562        let slice = self.slice_after_cursor();
563        let result = LittleEndian::read_f64(slice);
564        self.cursor += 8;
565        Ok(result)
566    }
567
568    #[inline(always)]
569    pub fn read_varuint32(&mut self) -> Result<u32, Error> {
570        let b0 = self.value_at(self.cursor)? as u32;
571        if b0 < 0x80 {
572            self.move_next(1);
573            return Ok(b0);
574        }
575
576        let b1 = self.value_at(self.cursor + 1)? as u32;
577        let mut encoded = (b0 & 0x7F) | ((b1 & 0x7F) << 7);
578        if b1 < 0x80 {
579            self.move_next(2);
580            return Ok(encoded);
581        }
582
583        let b2 = self.value_at(self.cursor + 2)? as u32;
584        encoded |= (b2 & 0x7F) << 14;
585        if b2 < 0x80 {
586            self.move_next(3);
587            return Ok(encoded);
588        }
589
590        let b3 = self.value_at(self.cursor + 3)? as u32;
591        encoded |= (b3 & 0x7F) << 21;
592        if b3 < 0x80 {
593            self.move_next(4);
594            return Ok(encoded);
595        }
596
597        let b4 = self.value_at(self.cursor + 4)? as u32;
598        encoded |= b4 << 28;
599        self.move_next(5);
600        Ok(encoded)
601    }
602
603    #[inline(always)]
604    pub fn read_varint32(&mut self) -> Result<i32, Error> {
605        let encoded = self.read_varuint32()?;
606        Ok(((encoded >> 1) as i32) ^ -((encoded & 1) as i32))
607    }
608
609    #[inline(always)]
610    pub fn read_varuint64(&mut self) -> Result<u64, Error> {
611        let b0 = self.value_at(self.cursor)? as u64;
612        if b0 < 0x80 {
613            self.move_next(1);
614            return Ok(b0);
615        }
616
617        let b1 = self.value_at(self.cursor + 1)? as u64;
618        let mut var64 = (b0 & 0x7F) | ((b1 & 0x7F) << 7);
619        if b1 < 0x80 {
620            self.move_next(2);
621            return Ok(var64);
622        }
623
624        let b2 = self.value_at(self.cursor + 2)? as u64;
625        var64 |= (b2 & 0x7F) << 14;
626        if b2 < 0x80 {
627            self.move_next(3);
628            return Ok(var64);
629        }
630
631        let b3 = self.value_at(self.cursor + 3)? as u64;
632        var64 |= (b3 & 0x7F) << 21;
633        if b3 < 0x80 {
634            self.move_next(4);
635            return Ok(var64);
636        }
637
638        let b4 = self.value_at(self.cursor + 4)? as u64;
639        var64 |= (b4 & 0x7F) << 28;
640        if b4 < 0x80 {
641            self.move_next(5);
642            return Ok(var64);
643        }
644
645        let b5 = self.value_at(self.cursor + 5)? as u64;
646        var64 |= (b5 & 0x7F) << 35;
647        if b5 < 0x80 {
648            self.move_next(6);
649            return Ok(var64);
650        }
651
652        let b6 = self.value_at(self.cursor + 6)? as u64;
653        var64 |= (b6 & 0x7F) << 42;
654        if b6 < 0x80 {
655            self.move_next(7);
656            return Ok(var64);
657        }
658
659        let b7 = self.value_at(self.cursor + 7)? as u64;
660        var64 |= (b7 & 0x7F) << 49;
661        if b7 < 0x80 {
662            self.move_next(8);
663            return Ok(var64);
664        }
665
666        let b8 = self.value_at(self.cursor + 8)? as u64;
667        var64 |= (b8 & 0xFF) << 56;
668        self.move_next(9);
669        Ok(var64)
670    }
671
672    #[inline(always)]
673    pub fn read_varint64(&mut self) -> Result<i64, Error> {
674        let encoded = self.read_varuint64()?;
675        Ok(((encoded >> 1) as i64) ^ -((encoded & 1) as i64))
676    }
677
678    #[inline(always)]
679    pub fn read_latin1_string(&mut self, len: usize) -> Result<String, Error> {
680        self.check_bound(len)?;
681        if len < SIMD_THRESHOLD {
682            // Fast path for small buffers
683            unsafe {
684                let src = self.sub_slice(self.cursor, self.cursor + len)?;
685
686                // Check if all bytes are ASCII (< 0x80)
687                let is_ascii = src.iter().all(|&b| b < 0x80);
688
689                if is_ascii {
690                    // ASCII fast path: Latin1 == UTF-8, direct copy
691                    let mut vec = Vec::with_capacity(len);
692                    let dst = vec.as_mut_ptr();
693                    std::ptr::copy_nonoverlapping(src.as_ptr(), dst, len);
694                    vec.set_len(len);
695                    self.move_next(len);
696                    Ok(String::from_utf8_unchecked(vec))
697                } else {
698                    // Contains Latin1 bytes (0x80-0xFF): must convert to UTF-8
699                    let mut out: Vec<u8> = Vec::with_capacity(len * 2);
700                    let out_ptr = out.as_mut_ptr();
701                    let mut out_len = 0;
702
703                    for &b in src {
704                        if b < 0x80 {
705                            *out_ptr.add(out_len) = b;
706                            out_len += 1;
707                        } else {
708                            // Latin1 -> UTF-8 encoding
709                            *out_ptr.add(out_len) = 0xC0 | (b >> 6);
710                            *out_ptr.add(out_len + 1) = 0x80 | (b & 0x3F);
711                            out_len += 2;
712                        }
713                    }
714
715                    out.set_len(out_len);
716                    self.move_next(len);
717                    Ok(String::from_utf8_unchecked(out))
718                }
719            }
720        } else {
721            // Use SIMD for larger strings where the overhead is amortized
722            read_latin1_simd(self, len)
723        }
724    }
725
726    #[inline(always)]
727    pub fn read_utf8_string(&mut self, len: usize) -> Result<String, Error> {
728        self.check_bound(len)?;
729        // don't use simd for memory copy, copy_non_overlapping is faster
730        unsafe {
731            let mut vec = Vec::with_capacity(len);
732            let src = self.bf.as_ptr().add(self.cursor);
733            let dst = vec.as_mut_ptr();
734            // Use fastest possible copy - copy_nonoverlapping compiles to memcpy
735            std::ptr::copy_nonoverlapping(src, dst, len);
736            vec.set_len(len);
737            self.move_next(len);
738            // SAFETY: Assuming valid UTF-8 bytes (responsibility of serialization protocol)
739            Ok(String::from_utf8_unchecked(vec))
740        }
741    }
742
743    #[inline(always)]
744    pub fn read_utf16_string(&mut self, len: usize) -> Result<String, Error> {
745        self.check_bound(len)?;
746        let slice = self.sub_slice(self.cursor, self.cursor + len)?;
747        let units: Vec<u16> = slice
748            .chunks_exact(2)
749            .map(|c| u16::from_le_bytes([c[0], c[1]]))
750            .collect();
751        self.move_next(len);
752        Ok(String::from_utf16_lossy(&units))
753    }
754
755    #[inline(always)]
756    pub fn read_varuint36small(&mut self) -> Result<u64, Error> {
757        let start = self.cursor;
758        let slice = self.slice_after_cursor();
759
760        if slice.len() >= 8 {
761            // here already check bound
762            let bulk = self.read_u64()?;
763            let mut result = bulk & 0x7F;
764            let mut read_idx = start;
765
766            if (bulk & 0x80) != 0 {
767                read_idx += 1;
768                result |= (bulk >> 1) & 0x3F80;
769                if (bulk & 0x8000) != 0 {
770                    read_idx += 1;
771                    result |= (bulk >> 2) & 0x1FC000;
772                    if (bulk & 0x800000) != 0 {
773                        read_idx += 1;
774                        result |= (bulk >> 3) & 0xFE00000;
775                        if (bulk & 0x80000000) != 0 {
776                            read_idx += 1;
777                            result |= (bulk >> 4) & 0xFF0000000;
778                        }
779                    }
780                }
781            }
782            self.cursor = read_idx + 1;
783            return Ok(result);
784        }
785
786        let mut result = 0u64;
787        let mut shift = 0;
788        while self.cursor < self.bf.len() {
789            let b = self.read_u8_uncheck();
790            result |= ((b & 0x7F) as u64) << shift;
791            if (b & 0x80) == 0 {
792                break;
793            }
794            shift += 7;
795            if shift >= 36 {
796                return Err(Error::encode_error("varuint36small overflow"));
797            }
798        }
799        Ok(result)
800    }
801
802    #[inline(always)]
803    pub fn skip(&mut self, len: usize) -> Result<(), Error> {
804        self.check_bound(len)?;
805        self.move_next(len);
806        Ok(())
807    }
808
809    #[inline(always)]
810    pub fn read_bytes(&mut self, len: usize) -> Result<&[u8], Error> {
811        self.check_bound(len)?;
812        let result = &self.bf[self.cursor..self.cursor + len];
813        self.move_next(len);
814        Ok(result)
815    }
816
817    #[inline(always)]
818    pub fn reset_cursor_to_here(&self) -> impl FnOnce(&mut Self) {
819        let raw_cursor = self.cursor;
820        move |this: &mut Self| {
821            this.cursor = raw_cursor;
822        }
823    }
824
825    pub fn set_cursor(&mut self, cursor: usize) {
826        self.cursor = cursor;
827    }
828}
829
830#[allow(clippy::needless_lifetimes)]
831unsafe impl<'a> Send for Reader<'a> {}
832#[allow(clippy::needless_lifetimes)]
833unsafe impl<'a> Sync for Reader<'a> {}