Skip to main content

fory_core/
buffer.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::error::Error;
19use crate::float16::float16;
20use crate::meta::buffer_rw_string::read_latin1_simd;
21use byteorder::{ByteOrder, LittleEndian};
22use std::cmp::max;
23
24/// Threshold for using SIMD optimizations in string operations.
25/// For buffers smaller than this, direct copy is faster than SIMD setup overhead.
26const SIMD_THRESHOLD: usize = 128;
27
28pub struct Writer<'a> {
29    pub(crate) bf: &'a mut Vec<u8>,
30}
31impl<'a> Writer<'a> {
32    // ============ Utility methods ============
33
34    #[inline(always)]
35    pub fn from_buffer(bf: &'a mut Vec<u8>) -> Writer<'a> {
36        Writer { bf }
37    }
38
39    #[inline(always)]
40    pub fn dump(&self) -> Vec<u8> {
41        self.bf.clone()
42    }
43
44    #[inline(always)]
45    pub fn reset(&mut self) {
46        self.bf.clear();
47    }
48
49    #[inline(always)]
50    pub fn len(&self) -> usize {
51        self.bf.len()
52    }
53
54    #[inline(always)]
55    pub fn is_empty(&self) -> bool {
56        self.bf.is_empty()
57    }
58
59    #[inline(always)]
60    pub fn reserve(&mut self, additional: usize) {
61        if self.bf.capacity() - self.len() < additional {
62            self.bf.reserve(max(additional * 2, self.bf.capacity()));
63        }
64    }
65
66    #[inline(always)]
67    pub fn skip(&mut self, len: usize) {
68        self.bf.resize(self.bf.len() + len, 0);
69    }
70
71    #[inline(always)]
72    pub fn set_bytes(&mut self, offset: usize, data: &[u8]) {
73        self.bf
74            .get_mut(offset..offset + data.len())
75            .unwrap()
76            .copy_from_slice(data);
77    }
78
79    #[inline(always)]
80    pub fn write_bytes(&mut self, v: &[u8]) -> usize {
81        self.bf.extend_from_slice(v);
82        v.len()
83    }
84
85    // ============ BOOL (TypeId = 1) ============
86
87    #[inline(always)]
88    pub fn write_bool(&mut self, value: bool) {
89        self.bf.push(if value { 1 } else { 0 });
90    }
91
92    // ============ INT8 (TypeId = 2) ============
93
94    #[inline(always)]
95    pub fn write_i8(&mut self, value: i8) {
96        self.bf.push(value as u8);
97    }
98
99    // ============ INT16 (TypeId = 3) ============
100
101    #[inline(always)]
102    pub fn write_i16(&mut self, value: i16) {
103        self.write_u16(value as u16);
104    }
105
106    // ============ INT32 (TypeId = 4) ============
107
108    #[inline(always)]
109    pub fn write_i32(&mut self, value: i32) {
110        self.write_u32(value as u32);
111    }
112
113    // ============ VARINT32 (TypeId = 5) ============
114
115    #[inline(always)]
116    pub fn write_varint32(&mut self, value: i32) {
117        let zigzag = ((value as i64) << 1) ^ ((value as i64) >> 31);
118        self._write_var_uint32(zigzag as u32)
119    }
120
121    // ============ INT64 (TypeId = 6) ============
122
123    #[inline(always)]
124    pub fn write_i64(&mut self, value: i64) {
125        self.write_u64(value as u64);
126    }
127
128    // ============ VARINT64 (TypeId = 7) ============
129
130    #[inline(always)]
131    pub fn write_varint64(&mut self, value: i64) {
132        let zigzag = ((value << 1) ^ (value >> 63)) as u64;
133        self._write_var_uint64(zigzag);
134    }
135
136    // ============ TAGGED_INT64 (TypeId = 8) ============
137
138    /// Write signed long using fory Tagged(Small long as int) encoding.
139    /// If value is in [0xc0000000, 0x3fffffff] (i.e., [-1073741824, 1073741823]),
140    /// encode as 4 bytes: `((value as i32) << 1)`.
141    /// Otherwise write as 9 bytes: `0b1 | little-endian 8 bytes i64`.
142    #[inline(always)]
143    pub fn write_tagged_i64(&mut self, value: i64) {
144        const HALF_MIN_INT_VALUE: i64 = i32::MIN as i64 / 2; // -1073741824
145        const HALF_MAX_INT_VALUE: i64 = i32::MAX as i64 / 2; // 1073741823
146        if (HALF_MIN_INT_VALUE..=HALF_MAX_INT_VALUE).contains(&value) {
147            // Fits in 31 bits (with sign), encode as 4 bytes with bit 0 = 0
148            let v = (value as i32) << 1;
149            self.write_i32(v);
150        } else {
151            // Write flag byte (0b1) followed by 8-byte i64
152            self.bf.push(0b1);
153            self.write_i64(value);
154        }
155    }
156
157    // ============ UINT8 (TypeId = 9) ============
158
159    #[inline(always)]
160    pub fn write_u8(&mut self, value: u8) {
161        self.bf.push(value);
162    }
163
164    // ============ UINT16 (TypeId = 10) ============
165
166    #[inline(always)]
167    pub fn write_u16(&mut self, value: u16) {
168        #[cfg(target_endian = "little")]
169        {
170            let bytes = unsafe { &*(&value as *const u16 as *const [u8; 2]) };
171            self.bf.extend_from_slice(bytes);
172        }
173        #[cfg(target_endian = "big")]
174        {
175            self.bf.extend_from_slice(&value.to_le_bytes());
176        }
177    }
178
179    // ============ UINT32 (TypeId = 11) ============
180
181    #[inline(always)]
182    pub fn write_u32(&mut self, value: u32) {
183        #[cfg(target_endian = "little")]
184        {
185            let bytes = unsafe { &*(&value as *const u32 as *const [u8; 4]) };
186            self.bf.extend_from_slice(bytes);
187        }
188        #[cfg(target_endian = "big")]
189        {
190            self.bf.extend_from_slice(&value.to_le_bytes());
191        }
192    }
193
194    // ============ VAR_UINT32 (TypeId = 12) ============
195
196    #[inline(always)]
197    pub fn write_var_uint32(&mut self, value: u32) {
198        self._write_var_uint32(value)
199    }
200
201    #[inline(always)]
202    fn _write_var_uint32(&mut self, value: u32) {
203        if value < 0x80 {
204            self.bf.push(value as u8);
205        } else if value < 0x4000 {
206            // 2 bytes
207            let u1 = ((value as u8) & 0x7F) | 0x80;
208            let u2 = (value >> 7) as u8;
209            self.write_u16(((u2 as u16) << 8) | u1 as u16);
210        } else if value < 0x200000 {
211            // 3 bytes
212            let u1 = ((value as u8) & 0x7F) | 0x80;
213            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
214            let u3 = (value >> 14) as u8;
215            self.write_u16(((u2 as u16) << 8) | u1 as u16);
216            self.bf.push(u3);
217        } else if value < 0x10000000 {
218            // 4 bytes
219            let u1 = ((value as u8) & 0x7F) | 0x80;
220            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
221            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
222            let u4 = (value >> 21) as u8;
223            self.write_u32(
224                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
225            );
226        } else {
227            // 5 bytes
228            let u1 = ((value as u8) & 0x7F) | 0x80;
229            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
230            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
231            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
232            let u5 = (value >> 28) as u8;
233            self.write_u32(
234                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
235            );
236            self.bf.push(u5);
237        }
238    }
239
240    // ============ UINT64 (TypeId = 13) ============
241
242    #[inline(always)]
243    pub fn write_u64(&mut self, value: u64) {
244        #[cfg(target_endian = "little")]
245        {
246            let bytes = unsafe { &*(&value as *const u64 as *const [u8; 8]) };
247            self.bf.extend_from_slice(bytes);
248        }
249        #[cfg(target_endian = "big")]
250        {
251            self.bf.extend_from_slice(&value.to_le_bytes());
252        }
253    }
254
255    // ============ VAR_UINT64 (TypeId = 14) ============
256
257    #[inline(always)]
258    pub fn write_var_uint64(&mut self, value: u64) {
259        self._write_var_uint64(value);
260    }
261
262    #[inline(always)]
263    fn _write_var_uint64(&mut self, value: u64) {
264        if value < 0x80 {
265            self.bf.push(value as u8);
266        } else if value < 0x4000 {
267            let u1 = ((value as u8) & 0x7F) | 0x80;
268            let u2 = (value >> 7) as u8;
269            self.write_u16(((u2 as u16) << 8) | u1 as u16);
270        } else if value < 0x200000 {
271            let u1 = ((value as u8) & 0x7F) | 0x80;
272            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
273            let u3 = (value >> 14) as u8;
274            self.write_u16(((u2 as u16) << 8) | u1 as u16);
275            self.bf.push(u3);
276        } else if value < 0x10000000 {
277            let u1 = ((value as u8) & 0x7F) | 0x80;
278            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
279            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
280            let u4 = (value >> 21) as u8;
281            self.write_u32(
282                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
283            );
284        } else if value < 0x800000000 {
285            let u1 = ((value as u8) & 0x7F) | 0x80;
286            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
287            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
288            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
289            let u5 = (value >> 28) as u8;
290            self.write_u32(
291                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
292            );
293            self.bf.push(u5);
294        } else if value < 0x40000000000 {
295            let u1 = ((value as u8) & 0x7F) | 0x80;
296            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
297            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
298            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
299            let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
300            let u6 = (value >> 35) as u8;
301            self.write_u32(
302                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
303            );
304            self.write_u16(((u6 as u16) << 8) | u5 as u16);
305        } else if value < 0x2000000000000 {
306            let u1 = ((value as u8) & 0x7F) | 0x80;
307            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
308            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
309            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
310            let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
311            let u6 = (((value >> 35) as u8) & 0x7F) | 0x80;
312            let u7 = (value >> 42) as u8;
313            self.write_u32(
314                ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
315            );
316            self.write_u16(((u6 as u16) << 8) | u5 as u16);
317            self.bf.push(u7);
318        } else if value < 0x100000000000000 {
319            let u1 = ((value as u8) & 0x7F) | 0x80;
320            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
321            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
322            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
323            let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
324            let u6 = (((value >> 35) as u8) & 0x7F) | 0x80;
325            let u7 = (((value >> 42) as u8) & 0x7F) | 0x80;
326            let u8 = (value >> 49) as u8;
327            self.write_u64(
328                (u8 as u64) << 56
329                    | (u7 as u64) << 48
330                    | (u6 as u64) << 40
331                    | (u5 as u64) << 32
332                    | (u4 as u64) << 24
333                    | (u3 as u64) << 16
334                    | (u2 as u64) << 8
335                    | (u1 as u64),
336            );
337        } else {
338            let u1 = ((value as u8) & 0x7F) | 0x80;
339            let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
340            let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
341            let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
342            let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
343            let u6 = (((value >> 35) as u8) & 0x7F) | 0x80;
344            let u7 = (((value >> 42) as u8) & 0x7F) | 0x80;
345            let u8 = (((value >> 49) as u8) & 0x7F) | 0x80;
346            let u9 = (value >> 56) as u8;
347            self.write_u64(
348                (u8 as u64) << 56
349                    | (u7 as u64) << 48
350                    | (u6 as u64) << 40
351                    | (u5 as u64) << 32
352                    | (u4 as u64) << 24
353                    | (u3 as u64) << 16
354                    | (u2 as u64) << 8
355                    | (u1 as u64),
356            );
357            self.bf.push(u9);
358        }
359    }
360
361    // ============ TAGGED_UINT64 (TypeId = 15) ============
362
363    /// Write unsigned long using fory Tagged(Small long as int) encoding.
364    /// If value is in [0, 0x7fffffff], encode as 4 bytes: `((value as u32) << 1)`.
365    /// Otherwise write as 9 bytes: `0b1 | little-endian 8 bytes u64`.
366    #[inline(always)]
367    pub fn write_tagged_u64(&mut self, value: u64) {
368        if value <= i32::MAX as u64 {
369            // Fits in 31 bits, encode as 4 bytes with bit 0 = 0
370            let v = (value as u32) << 1;
371            self.write_u32(v);
372        } else {
373            // Write flag byte (0b1) followed by 8-byte u64
374            self.bf.push(0b1);
375            self.write_u64(value);
376        }
377    }
378
379    // ============ FLOAT32 (TypeId = 17) ============
380
381    #[inline(always)]
382    pub fn write_f32(&mut self, value: f32) {
383        #[cfg(target_endian = "little")]
384        {
385            let bytes = unsafe { &*(&value as *const f32 as *const [u8; 4]) };
386            self.bf.extend_from_slice(bytes);
387        }
388        #[cfg(target_endian = "big")]
389        {
390            self.bf.extend_from_slice(&value.to_bits().to_le_bytes());
391        }
392    }
393
394    // ============ FLOAT16 (TypeId = 16) ============
395    #[inline(always)]
396    pub fn write_f16(&mut self, value: float16) {
397        self.write_u16(value.to_bits());
398    }
399
400    // ============ FLOAT64 (TypeId = 18) ============
401
402    #[inline(always)]
403    pub fn write_f64(&mut self, value: f64) {
404        #[cfg(target_endian = "little")]
405        {
406            let bytes = unsafe { &*(&value as *const f64 as *const [u8; 8]) };
407            self.bf.extend_from_slice(bytes);
408        }
409        #[cfg(target_endian = "big")]
410        {
411            self.bf.extend_from_slice(&value.to_bits().to_le_bytes());
412        }
413    }
414
415    // ============ STRING (TypeId = 19) ============
416
417    #[inline(always)]
418    pub fn write_utf8_string(&mut self, s: &str) {
419        let bytes = s.as_bytes();
420        let len = bytes.len();
421        self.bf.reserve(len);
422        self.bf.extend_from_slice(bytes);
423    }
424
425    // ============ Rust-specific types (i128, u128, isize, usize) ============
426
427    #[inline(always)]
428    pub fn write_i128(&mut self, value: i128) {
429        self.write_u128(value as u128);
430    }
431
432    #[inline(always)]
433    pub fn write_u128(&mut self, value: u128) {
434        #[cfg(target_endian = "little")]
435        {
436            let bytes = unsafe { &*(&value as *const u128 as *const [u8; 16]) };
437            self.bf.extend_from_slice(bytes);
438        }
439        #[cfg(target_endian = "big")]
440        {
441            self.bf.extend_from_slice(&value.to_le_bytes());
442        }
443    }
444
445    #[inline(always)]
446    pub fn write_isize(&mut self, value: isize) {
447        const SIZE: usize = std::mem::size_of::<isize>();
448        match SIZE {
449            2 => self.write_i16(value as i16),
450            4 => self.write_varint32(value as i32),
451            8 => self.write_varint64(value as i64),
452            _ => unreachable!("unsupported isize size"),
453        }
454    }
455
456    #[inline(always)]
457    pub fn write_usize(&mut self, value: usize) {
458        const SIZE: usize = std::mem::size_of::<usize>();
459        match SIZE {
460            2 => self.write_u16(value as u16),
461            4 => self.write_var_uint32(value as u32),
462            8 => self.write_var_uint64(value as u64),
463            _ => unreachable!("unsupported usize size"),
464        }
465    }
466
467    // ============ Other helper methods ============
468
469    #[inline(always)]
470    pub fn write_var_uint36_small(&mut self, value: u64) {
471        assert!(value < (1u64 << 36), "value too large for 36-bit varint");
472        if value < 0x80 {
473            self.bf.push(value as u8);
474        } else if value < 0x4000 {
475            let b0 = ((value & 0x7F) as u8) | 0x80;
476            let b1 = (value >> 7) as u8;
477            let combined = ((b1 as u16) << 8) | (b0 as u16);
478            self.write_u16(combined);
479        } else if value < 0x200000 {
480            let b0 = (value & 0x7F) | 0x80;
481            let b1 = ((value >> 7) & 0x7F) | 0x80;
482            let b2 = value >> 14;
483            let combined = b0 | (b1 << 8) | (b2 << 16);
484            self.write_u32(combined as u32);
485        } else if value < 0x10000000 {
486            let b0 = (value & 0x7F) | 0x80;
487            let b1 = ((value >> 7) & 0x7F) | 0x80;
488            let b2 = ((value >> 14) & 0x7F) | 0x80;
489            let b3 = value >> 21;
490            let combined = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
491            self.write_u32(combined as u32);
492        } else {
493            let b0 = (value & 0x7F) | 0x80;
494            let b1 = ((value >> 7) & 0x7F) | 0x80;
495            let b2 = ((value >> 14) & 0x7F) | 0x80;
496            let b3 = ((value >> 21) & 0x7F) | 0x80;
497            let b4 = value >> 28;
498            let combined = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24) | (b4 << 32);
499            self.write_u64(combined);
500        }
501    }
502}
503
504#[derive(Default)]
505#[allow(clippy::needless_lifetimes)]
506pub struct Reader<'a> {
507    pub(crate) bf: &'a [u8],
508    pub(crate) cursor: usize,
509}
510
511#[allow(clippy::needless_lifetimes)]
512impl<'a> Reader<'a> {
513    // ============ Utility methods ============
514
515    #[inline(always)]
516    pub fn new(bf: &[u8]) -> Reader<'_> {
517        Reader { bf, cursor: 0 }
518    }
519
520    #[inline(always)]
521    pub(crate) fn move_next(&mut self, additional: usize) {
522        self.cursor += additional;
523    }
524
525    #[inline(always)]
526    pub(crate) fn move_back(&mut self, additional: usize) {
527        self.cursor -= additional;
528    }
529
530    #[inline(always)]
531    pub fn sub_slice(&self, start: usize, end: usize) -> Result<&[u8], Error> {
532        // Allow start == bf.len() when end == bf.len() to support empty slices at buffer end
533        if start > self.bf.len() || end > self.bf.len() || end < start {
534            Err(Error::buffer_out_of_bound(
535                start,
536                self.bf.len(),
537                self.bf.len(),
538            ))
539        } else {
540            Ok(&self.bf[start..end])
541        }
542    }
543
544    #[inline(always)]
545    pub fn slice_after_cursor(&self) -> &[u8] {
546        &self.bf[self.cursor..]
547    }
548
549    #[inline(always)]
550    pub fn get_cursor(&self) -> usize {
551        self.cursor
552    }
553
554    #[inline(always)]
555    fn value_at(&self, index: usize) -> Result<u8, Error> {
556        match self.bf.get(index) {
557            None => Err(Error::buffer_out_of_bound(
558                index,
559                self.bf.len(),
560                self.bf.len(),
561            )),
562            Some(v) => Ok(*v),
563        }
564    }
565
566    #[inline(always)]
567    fn check_bound(&self, n: usize) -> Result<(), Error> {
568        let end = self
569            .cursor
570            .checked_add(n)
571            .ok_or_else(|| Error::buffer_out_of_bound(self.cursor, n, self.bf.len()))?;
572        if end > self.bf.len() {
573            Err(Error::buffer_out_of_bound(self.cursor, n, self.bf.len()))
574        } else {
575            Ok(())
576        }
577    }
578
579    #[inline(always)]
580    fn read_u8_uncheck(&mut self) -> u8 {
581        let result = unsafe { self.bf.get_unchecked(self.cursor) };
582        self.move_next(1);
583        *result
584    }
585
586    #[inline(always)]
587    pub fn skip(&mut self, len: usize) -> Result<(), Error> {
588        self.check_bound(len)?;
589        self.move_next(len);
590        Ok(())
591    }
592
593    #[inline(always)]
594    pub fn read_bytes(&mut self, len: usize) -> Result<&[u8], Error> {
595        self.check_bound(len)?;
596        let result = &self.bf[self.cursor..self.cursor + len];
597        self.move_next(len);
598        Ok(result)
599    }
600
601    #[inline(always)]
602    pub fn reset_cursor_to_here(&self) -> impl FnOnce(&mut Self) {
603        let raw_cursor = self.cursor;
604        move |this: &mut Self| {
605            this.cursor = raw_cursor;
606        }
607    }
608
609    pub fn set_cursor(&mut self, cursor: usize) {
610        self.cursor = cursor;
611    }
612
613    // ============ BOOL (TypeId = 1) ============
614
615    #[inline(always)]
616    pub fn read_bool(&mut self) -> Result<bool, Error> {
617        Ok(self.read_u8()? != 0)
618    }
619
620    // ============ INT8 (TypeId = 2) ============
621
622    #[inline(always)]
623    pub fn read_i8(&mut self) -> Result<i8, Error> {
624        Ok(self.read_u8()? as i8)
625    }
626
627    // ============ INT16 (TypeId = 3) ============
628
629    #[inline(always)]
630    pub fn read_i16(&mut self) -> Result<i16, Error> {
631        Ok(self.read_u16()? as i16)
632    }
633
634    // ============ INT32 (TypeId = 4) ============
635
636    #[inline(always)]
637    pub fn read_i32(&mut self) -> Result<i32, Error> {
638        Ok(self.read_u32()? as i32)
639    }
640
641    // ============ VARINT32 (TypeId = 5) ============
642
643    #[inline(always)]
644    pub fn read_varint32(&mut self) -> Result<i32, Error> {
645        let encoded = self.read_varuint32()?;
646        Ok(((encoded >> 1) as i32) ^ -((encoded & 1) as i32))
647    }
648
649    // ============ INT64 (TypeId = 6) ============
650
651    #[inline(always)]
652    pub fn read_i64(&mut self) -> Result<i64, Error> {
653        Ok(self.read_u64()? as i64)
654    }
655
656    // ============ VARINT64 (TypeId = 7) ============
657
658    #[inline(always)]
659    pub fn read_varint64(&mut self) -> Result<i64, Error> {
660        let encoded = self.read_varuint64()?;
661        Ok(((encoded >> 1) as i64) ^ -((encoded & 1) as i64))
662    }
663
664    // ============ TAGGED_INT64 (TypeId = 8) ============
665
666    /// Read signed fory Tagged(Small long as int) encoded i64.
667    /// If bit 0 of the first 4 bytes is 0, return the value >> 1 (arithmetic shift).
668    /// Otherwise, skip the flag byte and read 8 bytes as i64.
669    #[inline(always)]
670    pub fn read_tagged_i64(&mut self) -> Result<i64, Error> {
671        self.check_bound(4)?;
672        let i = LittleEndian::read_i32(&self.bf[self.cursor..]);
673        if (i & 0b1) != 0b1 {
674            // Bit 0 is 0, small value encoded in 4 bytes
675            self.cursor += 4;
676            Ok((i >> 1) as i64) // arithmetic right shift preserves sign
677        } else {
678            // Bit 0 is 1, big value: skip flag byte and read 8 bytes
679            self.check_bound(9)?;
680            self.cursor += 1;
681            let value = LittleEndian::read_i64(&self.bf[self.cursor..]);
682            self.cursor += 8;
683            Ok(value)
684        }
685    }
686
687    // ============ UINT8 (TypeId = 9) ============
688
689    #[inline(always)]
690    pub fn peek_u8(&mut self) -> Result<u8, Error> {
691        let result = self.value_at(self.cursor)?;
692        Ok(result)
693    }
694
695    #[inline(always)]
696    pub fn read_u8(&mut self) -> Result<u8, Error> {
697        let result = self.value_at(self.cursor)?;
698        self.move_next(1);
699        Ok(result)
700    }
701
702    // ============ UINT16 (TypeId = 10) ============
703
704    #[inline(always)]
705    pub fn read_u16(&mut self) -> Result<u16, Error> {
706        self.check_bound(2)?;
707        let result = LittleEndian::read_u16(&self.bf[self.cursor..self.cursor + 2]);
708        self.cursor += 2;
709        Ok(result)
710    }
711
712    // ============ UINT32 (TypeId = 11) ============
713
714    #[inline(always)]
715    pub fn read_u32(&mut self) -> Result<u32, Error> {
716        self.check_bound(4)?;
717        let result = LittleEndian::read_u32(&self.bf[self.cursor..self.cursor + 4]);
718        self.cursor += 4;
719        Ok(result)
720    }
721
722    // ============ VAR_UINT32 (TypeId = 12) ============
723
724    #[inline(always)]
725    pub fn read_varuint32(&mut self) -> Result<u32, Error> {
726        let b0 = self.value_at(self.cursor)? as u32;
727        if b0 < 0x80 {
728            self.move_next(1);
729            return Ok(b0);
730        }
731
732        let b1 = self.value_at(self.cursor + 1)? as u32;
733        let mut encoded = (b0 & 0x7F) | ((b1 & 0x7F) << 7);
734        if b1 < 0x80 {
735            self.move_next(2);
736            return Ok(encoded);
737        }
738
739        let b2 = self.value_at(self.cursor + 2)? as u32;
740        encoded |= (b2 & 0x7F) << 14;
741        if b2 < 0x80 {
742            self.move_next(3);
743            return Ok(encoded);
744        }
745
746        let b3 = self.value_at(self.cursor + 3)? as u32;
747        encoded |= (b3 & 0x7F) << 21;
748        if b3 < 0x80 {
749            self.move_next(4);
750            return Ok(encoded);
751        }
752
753        let b4 = self.value_at(self.cursor + 4)? as u32;
754        encoded |= b4 << 28;
755        self.move_next(5);
756        Ok(encoded)
757    }
758
759    // ============ UINT64 (TypeId = 13) ============
760
761    #[inline(always)]
762    pub fn read_u64(&mut self) -> Result<u64, Error> {
763        self.check_bound(8)?;
764        let result = LittleEndian::read_u64(&self.bf[self.cursor..self.cursor + 8]);
765        self.cursor += 8;
766        Ok(result)
767    }
768
769    // ============ VAR_UINT64 (TypeId = 14) ============
770
771    #[inline(always)]
772    pub fn read_varuint64(&mut self) -> Result<u64, Error> {
773        let b0 = self.value_at(self.cursor)? as u64;
774        if b0 < 0x80 {
775            self.move_next(1);
776            return Ok(b0);
777        }
778
779        let b1 = self.value_at(self.cursor + 1)? as u64;
780        let mut result = (b0 & 0x7F) | ((b1 & 0x7F) << 7);
781        if b1 < 0x80 {
782            self.move_next(2);
783            return Ok(result);
784        }
785
786        let b2 = self.value_at(self.cursor + 2)? as u64;
787        result |= (b2 & 0x7F) << 14;
788        if b2 < 0x80 {
789            self.move_next(3);
790            return Ok(result);
791        }
792
793        let b3 = self.value_at(self.cursor + 3)? as u64;
794        result |= (b3 & 0x7F) << 21;
795        if b3 < 0x80 {
796            self.move_next(4);
797            return Ok(result);
798        }
799
800        let b4 = self.value_at(self.cursor + 4)? as u64;
801        result |= (b4 & 0x7F) << 28;
802        if b4 < 0x80 {
803            self.move_next(5);
804            return Ok(result);
805        }
806
807        let b5 = self.value_at(self.cursor + 5)? as u64;
808        result |= (b5 & 0x7F) << 35;
809        if b5 < 0x80 {
810            self.move_next(6);
811            return Ok(result);
812        }
813
814        let b6 = self.value_at(self.cursor + 6)? as u64;
815        result |= (b6 & 0x7F) << 42;
816        if b6 < 0x80 {
817            self.move_next(7);
818            return Ok(result);
819        }
820
821        let b7 = self.value_at(self.cursor + 7)? as u64;
822        result |= (b7 & 0x7F) << 49;
823        if b7 < 0x80 {
824            self.move_next(8);
825            return Ok(result);
826        }
827
828        let b8 = self.value_at(self.cursor + 8)? as u64;
829        result |= (b8 & 0xFF) << 56;
830        self.move_next(9);
831        Ok(result)
832    }
833
834    // ============ TAGGED_UINT64 (TypeId = 15) ============
835
836    /// Read unsigned fory Tagged(Small long as int) encoded u64.
837    /// If bit 0 of the first 4 bytes is 0, return the value >> 1.
838    /// Otherwise, skip the flag byte and read 8 bytes as u64.
839    #[inline(always)]
840    pub fn read_tagged_u64(&mut self) -> Result<u64, Error> {
841        self.check_bound(4)?;
842        let i = LittleEndian::read_u32(&self.bf[self.cursor..]);
843        if (i & 0b1) != 0b1 {
844            // Bit 0 is 0, small value encoded in 4 bytes
845            self.cursor += 4;
846            Ok((i >> 1) as u64)
847        } else {
848            // Bit 0 is 1, big value: skip flag byte and read 8 bytes
849            self.check_bound(9)?;
850            self.cursor += 1;
851            let value = LittleEndian::read_u64(&self.bf[self.cursor..]);
852            self.cursor += 8;
853            Ok(value)
854        }
855    }
856
857    // ============ FLOAT32 (TypeId = 17) ============
858
859    #[inline(always)]
860    pub fn read_f32(&mut self) -> Result<f32, Error> {
861        self.check_bound(4)?;
862        let result = LittleEndian::read_f32(&self.bf[self.cursor..self.cursor + 4]);
863        self.cursor += 4;
864        Ok(result)
865    }
866
867    // ============ FLOAT64 (TypeId = 18) ============
868    #[inline(always)]
869    pub fn read_f16(&mut self) -> Result<float16, Error> {
870        self.check_bound(2)?;
871        let bits = LittleEndian::read_u16(&self.bf[self.cursor..self.cursor + 2]);
872        self.cursor += 2;
873        Ok(float16::from_bits(bits))
874    }
875
876    pub fn read_f64(&mut self) -> Result<f64, Error> {
877        self.check_bound(8)?;
878        let result = LittleEndian::read_f64(&self.bf[self.cursor..self.cursor + 8]);
879        self.cursor += 8;
880        Ok(result)
881    }
882
883    // ============ STRING (TypeId = 19) ============
884
885    #[inline(always)]
886    pub fn read_latin1_string(&mut self, len: usize) -> Result<String, Error> {
887        self.check_bound(len)?;
888        if len < SIMD_THRESHOLD {
889            // Fast path for small buffers
890            unsafe {
891                let src = self.sub_slice(self.cursor, self.cursor + len)?;
892
893                // Check if all bytes are ASCII (< 0x80)
894                let is_ascii = src.iter().all(|&b| b < 0x80);
895
896                if is_ascii {
897                    // ASCII fast path: Latin1 == UTF-8, direct copy
898                    let mut vec = Vec::with_capacity(len);
899                    let dst = vec.as_mut_ptr();
900                    std::ptr::copy_nonoverlapping(src.as_ptr(), dst, len);
901                    vec.set_len(len);
902                    self.move_next(len);
903                    Ok(String::from_utf8_unchecked(vec))
904                } else {
905                    // Contains Latin1 bytes (0x80-0xFF): must convert to UTF-8
906                    let mut out: Vec<u8> = Vec::with_capacity(len * 2);
907                    let out_ptr = out.as_mut_ptr();
908                    let mut out_len = 0;
909
910                    for &b in src {
911                        if b < 0x80 {
912                            *out_ptr.add(out_len) = b;
913                            out_len += 1;
914                        } else {
915                            // Latin1 -> UTF-8 encoding
916                            *out_ptr.add(out_len) = 0xC0 | (b >> 6);
917                            *out_ptr.add(out_len + 1) = 0x80 | (b & 0x3F);
918                            out_len += 2;
919                        }
920                    }
921
922                    out.set_len(out_len);
923                    self.move_next(len);
924                    Ok(String::from_utf8_unchecked(out))
925                }
926            }
927        } else {
928            // Use SIMD for larger strings where the overhead is amortized
929            read_latin1_simd(self, len)
930        }
931    }
932
933    #[inline(always)]
934    pub fn read_utf8_string(&mut self, len: usize) -> Result<String, Error> {
935        self.check_bound(len)?;
936        // don't use simd for memory copy, copy_non_overlapping is faster
937        unsafe {
938            let mut vec = Vec::with_capacity(len);
939            let src = self.bf.as_ptr().add(self.cursor);
940            let dst = vec.as_mut_ptr();
941            // Use fastest possible copy - copy_nonoverlapping compiles to memcpy
942            std::ptr::copy_nonoverlapping(src, dst, len);
943            vec.set_len(len);
944            self.move_next(len);
945            // SAFETY: Assuming valid UTF-8 bytes (responsibility of serialization protocol)
946            Ok(String::from_utf8_unchecked(vec))
947        }
948    }
949
950    #[inline(always)]
951    pub fn read_utf16_string(&mut self, len: usize) -> Result<String, Error> {
952        self.check_bound(len)?;
953        let slice = self.sub_slice(self.cursor, self.cursor + len)?;
954        let units: Vec<u16> = slice
955            .chunks_exact(2)
956            .map(|c| u16::from_le_bytes([c[0], c[1]]))
957            .collect();
958        self.move_next(len);
959        Ok(String::from_utf16_lossy(&units))
960    }
961
962    // ============ Rust-specific types (i128, u128, isize, usize) ============
963
964    #[inline(always)]
965    pub fn read_i128(&mut self) -> Result<i128, Error> {
966        Ok(self.read_u128()? as i128)
967    }
968
969    #[inline(always)]
970    pub fn read_u128(&mut self) -> Result<u128, Error> {
971        self.check_bound(16)?;
972        let result = LittleEndian::read_u128(&self.bf[self.cursor..self.cursor + 16]);
973        self.cursor += 16;
974        Ok(result)
975    }
976
977    #[inline(always)]
978    pub fn read_isize(&mut self) -> Result<isize, Error> {
979        const SIZE: usize = std::mem::size_of::<isize>();
980        match SIZE {
981            2 => Ok(self.read_i16()? as isize),
982            4 => Ok(self.read_varint32()? as isize),
983            8 => Ok(self.read_varint64()? as isize),
984            _ => unreachable!("unsupported isize size"),
985        }
986    }
987
988    #[inline(always)]
989    pub fn read_usize(&mut self) -> Result<usize, Error> {
990        const SIZE: usize = std::mem::size_of::<usize>();
991        match SIZE {
992            2 => Ok(self.read_u16()? as usize),
993            4 => Ok(self.read_varuint32()? as usize),
994            8 => Ok(self.read_varuint64()? as usize),
995            _ => unreachable!("unsupported usize size"),
996        }
997    }
998
999    // ============ Other helper methods ============
1000
1001    #[inline(always)]
1002    pub fn read_varuint36small(&mut self) -> Result<u64, Error> {
1003        // Keep this API panic-free even if cursor is externally set past buffer end.
1004        self.check_bound(0)?;
1005        let start = self.cursor;
1006        let slice = self.slice_after_cursor();
1007
1008        if slice.len() >= 8 {
1009            // here already check bound
1010            let bulk = self.read_u64()?;
1011            let mut result = bulk & 0x7F;
1012            let mut read_idx = start;
1013
1014            if (bulk & 0x80) != 0 {
1015                read_idx += 1;
1016                result |= (bulk >> 1) & 0x3F80;
1017                if (bulk & 0x8000) != 0 {
1018                    read_idx += 1;
1019                    result |= (bulk >> 2) & 0x1FC000;
1020                    if (bulk & 0x800000) != 0 {
1021                        read_idx += 1;
1022                        result |= (bulk >> 3) & 0xFE00000;
1023                        if (bulk & 0x80000000) != 0 {
1024                            read_idx += 1;
1025                            result |= (bulk >> 4) & 0xFF0000000;
1026                        }
1027                    }
1028                }
1029            }
1030            self.cursor = read_idx + 1;
1031            return Ok(result);
1032        }
1033
1034        let mut result = 0u64;
1035        let mut shift = 0;
1036        while self.cursor < self.bf.len() {
1037            let b = self.read_u8_uncheck();
1038            result |= ((b & 0x7F) as u64) << shift;
1039            if (b & 0x80) == 0 {
1040                break;
1041            }
1042            shift += 7;
1043            if shift >= 36 {
1044                return Err(Error::encode_error("varuint36small overflow"));
1045            }
1046        }
1047        Ok(result)
1048    }
1049}
1050
1051#[allow(clippy::needless_lifetimes)]
1052unsafe impl<'a> Send for Reader<'a> {}
1053#[allow(clippy::needless_lifetimes)]
1054unsafe impl<'a> Sync for Reader<'a> {}