1use crate::error::Error;
19use crate::types::bfloat16::bfloat16;
20use crate::types::float16::float16;
21use crate::util::buffer_rw_string::read_latin1_simd;
22use byteorder::{ByteOrder, LittleEndian};
23use std::cmp::max;
24
25const SIMD_THRESHOLD: usize = 128;
28
29pub struct Writer<'a> {
30 pub(crate) bf: &'a mut Vec<u8>,
31}
32impl<'a> Writer<'a> {
33 #[inline(always)]
36 pub fn from_buffer(bf: &'a mut Vec<u8>) -> Writer<'a> {
37 Writer { bf }
38 }
39
40 #[inline(always)]
41 pub fn dump(&self) -> Vec<u8> {
42 self.bf.clone()
43 }
44
45 #[inline(always)]
46 pub fn reset(&mut self) {
47 self.bf.clear();
48 }
49
50 #[inline(always)]
51 pub fn len(&self) -> usize {
52 self.bf.len()
53 }
54
55 #[inline(always)]
56 pub fn is_empty(&self) -> bool {
57 self.bf.is_empty()
58 }
59
60 #[inline(always)]
61 pub fn reserve(&mut self, additional: usize) {
62 if self.bf.capacity() - self.len() < additional {
63 self.bf.reserve(max(additional * 2, self.bf.capacity()));
64 }
65 }
66
67 #[inline(always)]
68 pub fn skip(&mut self, len: usize) {
69 self.bf.resize(self.bf.len() + len, 0);
70 }
71
72 #[inline(always)]
73 pub fn set_bytes(&mut self, offset: usize, data: &[u8]) {
74 self.bf
75 .get_mut(offset..offset + data.len())
76 .unwrap()
77 .copy_from_slice(data);
78 }
79
80 #[inline(always)]
81 pub fn write_bytes(&mut self, v: &[u8]) -> usize {
82 self.bf.extend_from_slice(v);
83 v.len()
84 }
85
86 #[inline(always)]
89 pub fn write_bool(&mut self, value: bool) {
90 self.bf.push(if value { 1 } else { 0 });
91 }
92
93 #[inline(always)]
96 pub fn write_i8(&mut self, value: i8) {
97 self.bf.push(value as u8);
98 }
99
100 #[inline(always)]
103 pub fn write_i16(&mut self, value: i16) {
104 self.write_u16(value as u16);
105 }
106
107 #[inline(always)]
110 pub fn write_i32(&mut self, value: i32) {
111 self.write_u32(value as u32);
112 }
113
114 #[inline(always)]
117 pub fn write_var_i32(&mut self, value: i32) {
118 let zigzag = ((value as u32) << 1) ^ ((value >> 31) as u32);
119 self._write_var_u32(zigzag)
120 }
121
122 #[inline(always)]
125 pub fn write_i64(&mut self, value: i64) {
126 self.write_u64(value as u64);
127 }
128
129 #[inline(always)]
132 pub fn write_var_i64(&mut self, value: i64) {
133 let zigzag = ((value as u64) << 1) ^ ((value >> 63) as u64);
134 self._write_var_u64(zigzag);
135 }
136
137 #[inline(always)]
144 pub fn write_tagged_i64(&mut self, value: i64) {
145 const HALF_MIN_INT_VALUE: i64 = i32::MIN as i64 / 2; const HALF_MAX_INT_VALUE: i64 = i32::MAX as i64 / 2; if (HALF_MIN_INT_VALUE..=HALF_MAX_INT_VALUE).contains(&value) {
148 let v = (value as i32) << 1;
150 self.write_i32(v);
151 } else {
152 self.bf.push(0b1);
154 self.write_i64(value);
155 }
156 }
157
158 #[inline(always)]
161 pub fn write_u8(&mut self, value: u8) {
162 self.bf.push(value);
163 }
164
165 #[inline(always)]
168 pub fn write_u16(&mut self, value: u16) {
169 #[cfg(target_endian = "little")]
170 {
171 let bytes = unsafe { &*(&value as *const u16 as *const [u8; 2]) };
172 self.bf.extend_from_slice(bytes);
173 }
174 #[cfg(target_endian = "big")]
175 {
176 self.bf.extend_from_slice(&value.to_le_bytes());
177 }
178 }
179
180 #[inline(always)]
183 pub fn write_u32(&mut self, value: u32) {
184 #[cfg(target_endian = "little")]
185 {
186 let bytes = unsafe { &*(&value as *const u32 as *const [u8; 4]) };
187 self.bf.extend_from_slice(bytes);
188 }
189 #[cfg(target_endian = "big")]
190 {
191 self.bf.extend_from_slice(&value.to_le_bytes());
192 }
193 }
194
195 #[inline(always)]
198 pub fn write_var_u32(&mut self, value: u32) {
199 self._write_var_u32(value)
200 }
201
202 #[inline(always)]
203 fn _write_var_u32(&mut self, value: u32) {
204 if value < 0x80 {
205 self.bf.push(value as u8);
206 } else if value < 0x4000 {
207 let u1 = ((value as u8) & 0x7F) | 0x80;
209 let u2 = (value >> 7) as u8;
210 self.write_u16(((u2 as u16) << 8) | u1 as u16);
211 } else if value < 0x200000 {
212 let u1 = ((value as u8) & 0x7F) | 0x80;
214 let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
215 let u3 = (value >> 14) as u8;
216 self.write_u16(((u2 as u16) << 8) | u1 as u16);
217 self.bf.push(u3);
218 } else if value < 0x10000000 {
219 let u1 = ((value as u8) & 0x7F) | 0x80;
221 let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
222 let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
223 let u4 = (value >> 21) as u8;
224 self.write_u32(
225 ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
226 );
227 } else {
228 let u1 = ((value as u8) & 0x7F) | 0x80;
230 let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
231 let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
232 let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
233 let u5 = (value >> 28) as u8;
234 self.write_u32(
235 ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
236 );
237 self.bf.push(u5);
238 }
239 }
240
241 #[inline(always)]
244 pub fn write_u64(&mut self, value: u64) {
245 #[cfg(target_endian = "little")]
246 {
247 let bytes = unsafe { &*(&value as *const u64 as *const [u8; 8]) };
248 self.bf.extend_from_slice(bytes);
249 }
250 #[cfg(target_endian = "big")]
251 {
252 self.bf.extend_from_slice(&value.to_le_bytes());
253 }
254 }
255
256 #[inline(always)]
259 pub fn write_var_u64(&mut self, value: u64) {
260 self._write_var_u64(value);
261 }
262
263 #[inline(always)]
264 fn _write_var_u64(&mut self, value: u64) {
265 if value < 0x80 {
266 self.bf.push(value as u8);
267 } else if value < 0x4000 {
268 let u1 = ((value as u8) & 0x7F) | 0x80;
269 let u2 = (value >> 7) as u8;
270 self.write_u16(((u2 as u16) << 8) | u1 as u16);
271 } else if value < 0x200000 {
272 let u1 = ((value as u8) & 0x7F) | 0x80;
273 let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
274 let u3 = (value >> 14) as u8;
275 self.write_u16(((u2 as u16) << 8) | u1 as u16);
276 self.bf.push(u3);
277 } else if value < 0x10000000 {
278 let u1 = ((value as u8) & 0x7F) | 0x80;
279 let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
280 let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
281 let u4 = (value >> 21) as u8;
282 self.write_u32(
283 ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
284 );
285 } else if value < 0x800000000 {
286 let u1 = ((value as u8) & 0x7F) | 0x80;
287 let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
288 let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
289 let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
290 let u5 = (value >> 28) as u8;
291 self.write_u32(
292 ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
293 );
294 self.bf.push(u5);
295 } else if value < 0x40000000000 {
296 let u1 = ((value as u8) & 0x7F) | 0x80;
297 let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
298 let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
299 let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
300 let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
301 let u6 = (value >> 35) as u8;
302 self.write_u32(
303 ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
304 );
305 self.write_u16(((u6 as u16) << 8) | u5 as u16);
306 } else if value < 0x2000000000000 {
307 let u1 = ((value as u8) & 0x7F) | 0x80;
308 let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
309 let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
310 let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
311 let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
312 let u6 = (((value >> 35) as u8) & 0x7F) | 0x80;
313 let u7 = (value >> 42) as u8;
314 self.write_u32(
315 ((u4 as u32) << 24) | ((u3 as u32) << 16) | ((u2 as u32) << 8) | u1 as u32,
316 );
317 self.write_u16(((u6 as u16) << 8) | u5 as u16);
318 self.bf.push(u7);
319 } else if value < 0x100000000000000 {
320 let u1 = ((value as u8) & 0x7F) | 0x80;
321 let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
322 let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
323 let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
324 let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
325 let u6 = (((value >> 35) as u8) & 0x7F) | 0x80;
326 let u7 = (((value >> 42) as u8) & 0x7F) | 0x80;
327 let u8 = (value >> 49) as u8;
328 self.write_u64(
329 (u8 as u64) << 56
330 | (u7 as u64) << 48
331 | (u6 as u64) << 40
332 | (u5 as u64) << 32
333 | (u4 as u64) << 24
334 | (u3 as u64) << 16
335 | (u2 as u64) << 8
336 | (u1 as u64),
337 );
338 } else {
339 let u1 = ((value as u8) & 0x7F) | 0x80;
340 let u2 = (((value >> 7) as u8) & 0x7F) | 0x80;
341 let u3 = (((value >> 14) as u8) & 0x7F) | 0x80;
342 let u4 = (((value >> 21) as u8) & 0x7F) | 0x80;
343 let u5 = (((value >> 28) as u8) & 0x7F) | 0x80;
344 let u6 = (((value >> 35) as u8) & 0x7F) | 0x80;
345 let u7 = (((value >> 42) as u8) & 0x7F) | 0x80;
346 let u8 = (((value >> 49) as u8) & 0x7F) | 0x80;
347 let u9 = (value >> 56) as u8;
348 self.write_u64(
349 (u8 as u64) << 56
350 | (u7 as u64) << 48
351 | (u6 as u64) << 40
352 | (u5 as u64) << 32
353 | (u4 as u64) << 24
354 | (u3 as u64) << 16
355 | (u2 as u64) << 8
356 | (u1 as u64),
357 );
358 self.bf.push(u9);
359 }
360 }
361
362 #[inline(always)]
368 pub fn write_tagged_u64(&mut self, value: u64) {
369 if value <= i32::MAX as u64 {
370 let v = (value as u32) << 1;
372 self.write_u32(v);
373 } else {
374 self.bf.push(0b1);
376 self.write_u64(value);
377 }
378 }
379
380 #[inline(always)]
383 pub fn write_f32(&mut self, value: f32) {
384 #[cfg(target_endian = "little")]
385 {
386 let bytes = unsafe { &*(&value as *const f32 as *const [u8; 4]) };
387 self.bf.extend_from_slice(bytes);
388 }
389 #[cfg(target_endian = "big")]
390 {
391 self.bf.extend_from_slice(&value.to_bits().to_le_bytes());
392 }
393 }
394
395 #[inline(always)]
397 pub fn write_f16(&mut self, value: float16) {
398 self.write_u16(value.to_bits());
399 }
400
401 #[inline(always)]
403 pub fn write_bf16(&mut self, value: bfloat16) {
404 self.write_u16(value.to_bits());
405 }
406
407 #[inline(always)]
410 pub fn write_f64(&mut self, value: f64) {
411 #[cfg(target_endian = "little")]
412 {
413 let bytes = unsafe { &*(&value as *const f64 as *const [u8; 8]) };
414 self.bf.extend_from_slice(bytes);
415 }
416 #[cfg(target_endian = "big")]
417 {
418 self.bf.extend_from_slice(&value.to_bits().to_le_bytes());
419 }
420 }
421
422 #[inline(always)]
425 pub fn write_utf8_string(&mut self, s: &str) {
426 let bytes = s.as_bytes();
427 let len = bytes.len();
428 self.bf.reserve(len);
429 self.bf.extend_from_slice(bytes);
430 }
431
432 #[inline(always)]
435 pub fn write_i128(&mut self, value: i128) {
436 self.write_u128(value as u128);
437 }
438
439 #[inline(always)]
440 pub fn write_u128(&mut self, value: u128) {
441 #[cfg(target_endian = "little")]
442 {
443 let bytes = unsafe { &*(&value as *const u128 as *const [u8; 16]) };
444 self.bf.extend_from_slice(bytes);
445 }
446 #[cfg(target_endian = "big")]
447 {
448 self.bf.extend_from_slice(&value.to_le_bytes());
449 }
450 }
451
452 #[inline(always)]
453 pub fn write_isize(&mut self, value: isize) {
454 const SIZE: usize = std::mem::size_of::<isize>();
455 match SIZE {
456 2 => self.write_i16(value as i16),
457 4 => self.write_var_i32(value as i32),
458 8 => self.write_var_i64(value as i64),
459 _ => unreachable!("unsupported isize size"),
460 }
461 }
462
463 #[inline(always)]
464 pub fn write_usize(&mut self, value: usize) {
465 const SIZE: usize = std::mem::size_of::<usize>();
466 match SIZE {
467 2 => self.write_u16(value as u16),
468 4 => self.write_var_u32(value as u32),
469 8 => self.write_var_u64(value as u64),
470 _ => unreachable!("unsupported usize size"),
471 }
472 }
473
474 #[inline(always)]
477 pub fn write_var_u36_small(&mut self, value: u64) {
478 assert!(
479 value < (1u64 << 36),
480 "value too large for 36-bit variable-length integer"
481 );
482 if value < 0x80 {
483 self.bf.push(value as u8);
484 } else if value < 0x4000 {
485 let b0 = ((value & 0x7F) as u8) | 0x80;
486 let b1 = (value >> 7) as u8;
487 let combined = ((b1 as u16) << 8) | (b0 as u16);
488 self.write_u16(combined);
489 } else if value < 0x200000 {
490 let b0 = (value & 0x7F) | 0x80;
491 let b1 = ((value >> 7) & 0x7F) | 0x80;
492 let b2 = value >> 14;
493 let combined = b0 | (b1 << 8) | (b2 << 16);
494 self.write_u32(combined as u32);
495 } else if value < 0x10000000 {
496 let b0 = (value & 0x7F) | 0x80;
497 let b1 = ((value >> 7) & 0x7F) | 0x80;
498 let b2 = ((value >> 14) & 0x7F) | 0x80;
499 let b3 = value >> 21;
500 let combined = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
501 self.write_u32(combined as u32);
502 } else {
503 let b0 = (value & 0x7F) | 0x80;
504 let b1 = ((value >> 7) & 0x7F) | 0x80;
505 let b2 = ((value >> 14) & 0x7F) | 0x80;
506 let b3 = ((value >> 21) & 0x7F) | 0x80;
507 let b4 = value >> 28;
508 let combined = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24) | (b4 << 32);
509 self.write_u64(combined);
510 }
511 }
512}
513
514#[derive(Default)]
515#[allow(clippy::needless_lifetimes)]
516pub struct Reader<'a> {
517 pub(crate) bf: &'a [u8],
518 pub(crate) cursor: usize,
519}
520
521#[allow(clippy::needless_lifetimes)]
522impl<'a> Reader<'a> {
523 #[inline(always)]
526 pub fn new(bf: &[u8]) -> Reader<'_> {
527 Reader { bf, cursor: 0 }
528 }
529
530 #[inline(always)]
531 pub(crate) fn move_next(&mut self, additional: usize) {
532 self.cursor += additional;
533 }
534
535 #[inline(always)]
536 pub(crate) fn move_back(&mut self, additional: usize) {
537 self.cursor -= additional;
538 }
539
540 #[inline(always)]
541 pub fn sub_slice(&self, start: usize, end: usize) -> Result<&[u8], Error> {
542 if start > self.bf.len() || end > self.bf.len() || end < start {
544 Err(Error::buffer_out_of_bound(
545 start,
546 self.bf.len(),
547 self.bf.len(),
548 ))
549 } else {
550 Ok(&self.bf[start..end])
551 }
552 }
553
554 #[inline(always)]
555 pub fn slice_after_cursor(&self) -> &[u8] {
556 &self.bf[self.cursor..]
557 }
558
559 #[inline(always)]
560 pub fn get_cursor(&self) -> usize {
561 self.cursor
562 }
563
564 #[inline(always)]
565 fn value_at(&self, index: usize) -> Result<u8, Error> {
566 match self.bf.get(index) {
567 None => Err(Error::buffer_out_of_bound(
568 index,
569 self.bf.len(),
570 self.bf.len(),
571 )),
572 Some(v) => Ok(*v),
573 }
574 }
575
576 #[inline(always)]
577 fn check_bound(&self, n: usize) -> Result<(), Error> {
578 let end = self
579 .cursor
580 .checked_add(n)
581 .ok_or_else(|| Error::buffer_out_of_bound(self.cursor, n, self.bf.len()))?;
582 if end > self.bf.len() {
583 Err(Error::buffer_out_of_bound(self.cursor, n, self.bf.len()))
584 } else {
585 Ok(())
586 }
587 }
588
589 #[inline(always)]
590 fn read_u8_uncheck(&mut self) -> u8 {
591 let result = unsafe { self.bf.get_unchecked(self.cursor) };
592 self.move_next(1);
593 *result
594 }
595
596 #[inline(always)]
597 pub fn skip(&mut self, len: usize) -> Result<(), Error> {
598 self.check_bound(len)?;
599 self.move_next(len);
600 Ok(())
601 }
602
603 #[inline(always)]
604 pub fn read_bytes(&mut self, len: usize) -> Result<&[u8], Error> {
605 self.check_bound(len)?;
606 let result = &self.bf[self.cursor..self.cursor + len];
607 self.move_next(len);
608 Ok(result)
609 }
610
611 #[inline(always)]
612 pub fn reset_cursor_to_here(&self) -> impl FnOnce(&mut Self) {
613 let raw_cursor = self.cursor;
614 move |this: &mut Self| {
615 this.cursor = raw_cursor;
616 }
617 }
618
619 pub fn set_cursor(&mut self, cursor: usize) {
620 self.cursor = cursor;
621 }
622
623 #[inline(always)]
626 pub fn read_bool(&mut self) -> Result<bool, Error> {
627 Ok(self.read_u8()? != 0)
628 }
629
630 #[inline(always)]
633 pub fn read_i8(&mut self) -> Result<i8, Error> {
634 Ok(self.read_u8()? as i8)
635 }
636
637 #[inline(always)]
640 pub fn read_i16(&mut self) -> Result<i16, Error> {
641 Ok(self.read_u16()? as i16)
642 }
643
644 #[inline(always)]
647 pub fn read_i32(&mut self) -> Result<i32, Error> {
648 Ok(self.read_u32()? as i32)
649 }
650
651 #[inline(always)]
654 pub fn read_var_i32(&mut self) -> Result<i32, Error> {
655 let encoded = self.read_var_u32()?;
656 Ok(((encoded >> 1) as i32) ^ -((encoded & 1) as i32))
657 }
658
659 #[inline(always)]
662 pub fn read_i64(&mut self) -> Result<i64, Error> {
663 Ok(self.read_u64()? as i64)
664 }
665
666 #[inline(always)]
669 pub fn read_var_i64(&mut self) -> Result<i64, Error> {
670 let encoded = self.read_var_u64()?;
671 Ok(((encoded >> 1) as i64) ^ -((encoded & 1) as i64))
672 }
673
674 #[inline(always)]
680 pub fn read_tagged_i64(&mut self) -> Result<i64, Error> {
681 self.check_bound(4)?;
682 let i = LittleEndian::read_i32(&self.bf[self.cursor..]);
683 if (i & 0b1) != 0b1 {
684 self.cursor += 4;
686 Ok((i >> 1) as i64) } else {
688 self.check_bound(9)?;
690 self.cursor += 1;
691 let value = LittleEndian::read_i64(&self.bf[self.cursor..]);
692 self.cursor += 8;
693 Ok(value)
694 }
695 }
696
697 #[inline(always)]
700 pub fn peek_u8(&mut self) -> Result<u8, Error> {
701 let result = self.value_at(self.cursor)?;
702 Ok(result)
703 }
704
705 #[inline(always)]
706 pub fn read_u8(&mut self) -> Result<u8, Error> {
707 let result = self.value_at(self.cursor)?;
708 self.move_next(1);
709 Ok(result)
710 }
711
712 #[inline(always)]
715 pub fn read_u16(&mut self) -> Result<u16, Error> {
716 self.check_bound(2)?;
717 let result = LittleEndian::read_u16(&self.bf[self.cursor..self.cursor + 2]);
718 self.cursor += 2;
719 Ok(result)
720 }
721
722 #[inline(always)]
725 pub fn read_u32(&mut self) -> Result<u32, Error> {
726 self.check_bound(4)?;
727 let result = LittleEndian::read_u32(&self.bf[self.cursor..self.cursor + 4]);
728 self.cursor += 4;
729 Ok(result)
730 }
731
732 #[inline(always)]
735 pub fn read_var_u32(&mut self) -> Result<u32, Error> {
736 let b0 = self.value_at(self.cursor)? as u32;
737 if b0 < 0x80 {
738 self.move_next(1);
739 return Ok(b0);
740 }
741
742 let b1 = self.value_at(self.cursor + 1)? as u32;
743 let mut encoded = (b0 & 0x7F) | ((b1 & 0x7F) << 7);
744 if b1 < 0x80 {
745 self.move_next(2);
746 return Ok(encoded);
747 }
748
749 let b2 = self.value_at(self.cursor + 2)? as u32;
750 encoded |= (b2 & 0x7F) << 14;
751 if b2 < 0x80 {
752 self.move_next(3);
753 return Ok(encoded);
754 }
755
756 let b3 = self.value_at(self.cursor + 3)? as u32;
757 encoded |= (b3 & 0x7F) << 21;
758 if b3 < 0x80 {
759 self.move_next(4);
760 return Ok(encoded);
761 }
762
763 let b4 = self.value_at(self.cursor + 4)? as u32;
764 encoded |= b4 << 28;
765 self.move_next(5);
766 Ok(encoded)
767 }
768
769 #[inline(always)]
772 pub fn read_u64(&mut self) -> Result<u64, Error> {
773 self.check_bound(8)?;
774 let result = LittleEndian::read_u64(&self.bf[self.cursor..self.cursor + 8]);
775 self.cursor += 8;
776 Ok(result)
777 }
778
779 #[inline(always)]
782 pub fn read_var_u64(&mut self) -> Result<u64, Error> {
783 let b0 = self.value_at(self.cursor)? as u64;
784 if b0 < 0x80 {
785 self.move_next(1);
786 return Ok(b0);
787 }
788
789 let b1 = self.value_at(self.cursor + 1)? as u64;
790 let mut result = (b0 & 0x7F) | ((b1 & 0x7F) << 7);
791 if b1 < 0x80 {
792 self.move_next(2);
793 return Ok(result);
794 }
795
796 let b2 = self.value_at(self.cursor + 2)? as u64;
797 result |= (b2 & 0x7F) << 14;
798 if b2 < 0x80 {
799 self.move_next(3);
800 return Ok(result);
801 }
802
803 let b3 = self.value_at(self.cursor + 3)? as u64;
804 result |= (b3 & 0x7F) << 21;
805 if b3 < 0x80 {
806 self.move_next(4);
807 return Ok(result);
808 }
809
810 let b4 = self.value_at(self.cursor + 4)? as u64;
811 result |= (b4 & 0x7F) << 28;
812 if b4 < 0x80 {
813 self.move_next(5);
814 return Ok(result);
815 }
816
817 let b5 = self.value_at(self.cursor + 5)? as u64;
818 result |= (b5 & 0x7F) << 35;
819 if b5 < 0x80 {
820 self.move_next(6);
821 return Ok(result);
822 }
823
824 let b6 = self.value_at(self.cursor + 6)? as u64;
825 result |= (b6 & 0x7F) << 42;
826 if b6 < 0x80 {
827 self.move_next(7);
828 return Ok(result);
829 }
830
831 let b7 = self.value_at(self.cursor + 7)? as u64;
832 result |= (b7 & 0x7F) << 49;
833 if b7 < 0x80 {
834 self.move_next(8);
835 return Ok(result);
836 }
837
838 let b8 = self.value_at(self.cursor + 8)? as u64;
839 result |= (b8 & 0xFF) << 56;
840 self.move_next(9);
841 Ok(result)
842 }
843
844 #[inline(always)]
850 pub fn read_tagged_u64(&mut self) -> Result<u64, Error> {
851 self.check_bound(4)?;
852 let i = LittleEndian::read_u32(&self.bf[self.cursor..]);
853 if (i & 0b1) != 0b1 {
854 self.cursor += 4;
856 Ok((i >> 1) as u64)
857 } else {
858 self.check_bound(9)?;
860 self.cursor += 1;
861 let value = LittleEndian::read_u64(&self.bf[self.cursor..]);
862 self.cursor += 8;
863 Ok(value)
864 }
865 }
866
867 #[inline(always)]
870 pub fn read_f32(&mut self) -> Result<f32, Error> {
871 self.check_bound(4)?;
872 let result = LittleEndian::read_f32(&self.bf[self.cursor..self.cursor + 4]);
873 self.cursor += 4;
874 Ok(result)
875 }
876
877 #[inline(always)]
879 pub fn read_f16(&mut self) -> Result<float16, Error> {
880 self.check_bound(2)?;
881 let bits = LittleEndian::read_u16(&self.bf[self.cursor..self.cursor + 2]);
882 self.cursor += 2;
883 Ok(float16::from_bits(bits))
884 }
885
886 #[inline(always)]
887 pub fn read_bf16(&mut self) -> Result<bfloat16, Error> {
888 self.check_bound(2)?;
889 let bits = LittleEndian::read_u16(&self.bf[self.cursor..self.cursor + 2]);
890 self.cursor += 2;
891 Ok(bfloat16::from_bits(bits))
892 }
893
894 pub fn read_f64(&mut self) -> Result<f64, Error> {
895 self.check_bound(8)?;
896 let result = LittleEndian::read_f64(&self.bf[self.cursor..self.cursor + 8]);
897 self.cursor += 8;
898 Ok(result)
899 }
900
901 #[inline(always)]
904 pub fn read_latin1_string(&mut self, len: usize) -> Result<String, Error> {
905 self.check_bound(len)?;
906 if len < SIMD_THRESHOLD {
907 unsafe {
909 let src = self.sub_slice(self.cursor, self.cursor + len)?;
910
911 let is_ascii = src.iter().all(|&b| b < 0x80);
913
914 if is_ascii {
915 let mut vec = Vec::with_capacity(len);
917 let dst = vec.as_mut_ptr();
918 std::ptr::copy_nonoverlapping(src.as_ptr(), dst, len);
919 vec.set_len(len);
920 self.move_next(len);
921 Ok(String::from_utf8_unchecked(vec))
922 } else {
923 let mut out: Vec<u8> = Vec::with_capacity(len * 2);
925 let out_ptr = out.as_mut_ptr();
926 let mut out_len = 0;
927
928 for &b in src {
929 if b < 0x80 {
930 *out_ptr.add(out_len) = b;
931 out_len += 1;
932 } else {
933 *out_ptr.add(out_len) = 0xC0 | (b >> 6);
935 *out_ptr.add(out_len + 1) = 0x80 | (b & 0x3F);
936 out_len += 2;
937 }
938 }
939
940 out.set_len(out_len);
941 self.move_next(len);
942 Ok(String::from_utf8_unchecked(out))
943 }
944 }
945 } else {
946 read_latin1_simd(self, len)
948 }
949 }
950
951 #[inline(always)]
952 pub fn read_utf8_string(&mut self, len: usize) -> Result<String, Error> {
953 self.check_bound(len)?;
954 let src = &self.bf[self.cursor..self.cursor + len];
955 let string =
958 std::str::from_utf8(src).map_err(|_| Error::encoding_error("invalid UTF-8 string"))?;
959 let string = string.to_owned();
960 self.move_next(len);
961 Ok(string)
962 }
963
964 #[inline(always)]
965 pub fn read_utf8_string_unchecked(&mut self, len: usize) -> Result<String, Error> {
966 self.check_bound(len)?;
967 unsafe {
969 let mut vec = Vec::with_capacity(len);
970 let src = self.bf.as_ptr().add(self.cursor);
971 let dst = vec.as_mut_ptr();
972 std::ptr::copy_nonoverlapping(src, dst, len);
974 vec.set_len(len);
975 self.move_next(len);
976 Ok(String::from_utf8_unchecked(vec))
977 }
978 }
979
980 #[inline(always)]
981 pub fn read_utf16_string(&mut self, len: usize) -> Result<String, Error> {
982 self.check_bound(len)?;
983 let slice = self.sub_slice(self.cursor, self.cursor + len)?;
984 let units: Vec<u16> = slice
985 .chunks_exact(2)
986 .map(|c| u16::from_le_bytes([c[0], c[1]]))
987 .collect();
988 self.move_next(len);
989 Ok(String::from_utf16_lossy(&units))
990 }
991
992 #[inline(always)]
995 pub fn read_i128(&mut self) -> Result<i128, Error> {
996 Ok(self.read_u128()? as i128)
997 }
998
999 #[inline(always)]
1000 pub fn read_u128(&mut self) -> Result<u128, Error> {
1001 self.check_bound(16)?;
1002 let result = LittleEndian::read_u128(&self.bf[self.cursor..self.cursor + 16]);
1003 self.cursor += 16;
1004 Ok(result)
1005 }
1006
1007 #[inline(always)]
1008 pub fn read_isize(&mut self) -> Result<isize, Error> {
1009 const SIZE: usize = std::mem::size_of::<isize>();
1010 match SIZE {
1011 2 => Ok(self.read_i16()? as isize),
1012 4 => Ok(self.read_var_i32()? as isize),
1013 8 => Ok(self.read_var_i64()? as isize),
1014 _ => unreachable!("unsupported isize size"),
1015 }
1016 }
1017
1018 #[inline(always)]
1019 pub fn read_usize(&mut self) -> Result<usize, Error> {
1020 const SIZE: usize = std::mem::size_of::<usize>();
1021 match SIZE {
1022 2 => Ok(self.read_u16()? as usize),
1023 4 => Ok(self.read_var_u32()? as usize),
1024 8 => Ok(self.read_var_u64()? as usize),
1025 _ => unreachable!("unsupported usize size"),
1026 }
1027 }
1028
1029 #[inline(always)]
1032 pub fn read_var_u36_small(&mut self) -> Result<u64, Error> {
1033 self.check_bound(0)?;
1035 let start = self.cursor;
1036 let slice = self.slice_after_cursor();
1037
1038 if slice.len() >= 8 {
1039 let bulk = self.read_u64()?;
1041 let mut result = bulk & 0x7F;
1042 let mut read_idx = start;
1043
1044 if (bulk & 0x80) != 0 {
1045 read_idx += 1;
1046 result |= (bulk >> 1) & 0x3F80;
1047 if (bulk & 0x8000) != 0 {
1048 read_idx += 1;
1049 result |= (bulk >> 2) & 0x1FC000;
1050 if (bulk & 0x800000) != 0 {
1051 read_idx += 1;
1052 result |= (bulk >> 3) & 0xFE00000;
1053 if (bulk & 0x80000000) != 0 {
1054 read_idx += 1;
1055 result |= (bulk >> 4) & 0xFF0000000;
1056 }
1057 }
1058 }
1059 }
1060 self.cursor = read_idx + 1;
1061 return Ok(result);
1062 }
1063
1064 let mut result = 0u64;
1065 let mut shift = 0;
1066 while self.cursor < self.bf.len() {
1067 let b = self.read_u8_uncheck();
1068 result |= ((b & 0x7F) as u64) << shift;
1069 if (b & 0x80) == 0 {
1070 break;
1071 }
1072 shift += 7;
1073 if shift >= 36 {
1074 return Err(Error::encode_error("var_u36_small overflow"));
1075 }
1076 }
1077 Ok(result)
1078 }
1079}
1080
1081#[allow(clippy::needless_lifetimes)]
1082unsafe impl<'a> Send for Reader<'a> {}
1083#[allow(clippy::needless_lifetimes)]
1084unsafe impl<'a> Sync for Reader<'a> {}