pack_io/codec.rs
1//! The codec primitives: the [`Encode`] / [`Decode`] behaviour traits, the
2//! concrete in-memory [`Encoder`] / [`Decoder`] types, the [`Config`] struct,
3//! and the Tier-1 [`encode`] / [`decode`] free functions.
4//!
5//! ## Layering
6//!
7//! - **Tier 1** — the [`encode`] / [`decode`] free functions. One line each
8//! direction, no setup, no type parameters beyond the target type.
9//! - **Tier 2** — concrete encoder / decoder types. The in-memory pair
10//! ([`Encoder`] + [`Decoder`]) lives in this module; the streaming pair
11//! ([`crate::IoEncoder`] + [`crate::IoDecoder`]) lives in
12//! [`crate::io`] and is `std`-gated. All four implement the [`Encode`] /
13//! [`Decode`] behaviour traits, so [`Serialize`] / [`Deserialize`] impls
14//! work through any of them.
15//! - **Tier 3** — implementing the [`Serialize`] / [`Deserialize`] traits
16//! directly on your own types. Generic over `E: Encode` / `D: Decode`, so
17//! one impl works for both in-memory and streaming codecs.
18//!
19//! ## Safety contract for decoders
20//!
21//! Every method on [`Decode`] is total: it either returns the requested
22//! value (advancing the read cursor) or returns a [`SerialError`]. It never
23//! panics, never reads past the input, and never allocates more memory than
24//! the [`Config::max_alloc`] cap permits.
25
26use alloc::vec;
27use alloc::vec::Vec;
28
29use crate::error::{Result, SerialError};
30use crate::traits::{Deserialize, Serialize};
31use crate::varint;
32
33/// Configuration for a decode session.
34///
35/// At construction time the codec validates the configuration; an invalid
36/// config (`max_alloc == 0`) is rejected before any bytes are read.
37/// Validation happens once, in [`Decoder::with_config`] /
38/// [`crate::IoDecoder::with_config`], not on every operation.
39///
40/// `Config` is `#[non_exhaustive]` so the project can add knobs in a MINOR
41/// release without breaking downstream code. Build instances with
42/// [`Config::new`] / [`Config::with_max_alloc`] or via [`Default`].
43///
44/// # Examples
45///
46/// ```
47/// use pack_io::{Config, Decoder};
48///
49/// // Refuse to allocate more than 16 KiB for any single length-prefixed
50/// // value (a `String`, a `Vec<u8>`, a collection element count, …).
51/// // Hostile producers that send multi-gigabyte length prefixes fail fast.
52/// let cfg = Config::new().with_max_alloc(16 * 1024);
53/// let dec = Decoder::with_config(&[], cfg).expect("non-zero cap");
54/// drop(dec);
55/// ```
56#[non_exhaustive]
57#[derive(Debug, Clone, Copy, PartialEq, Eq)]
58pub struct Config {
59 /// Maximum number of bytes the decoder may allocate for any single
60 /// length-prefixed value (a `String`, a `Vec<u8>`, a collection element
61 /// count, …).
62 ///
63 /// The default is 1 GiB, which is enough that well-formed inputs are
64 /// never rejected on size, while still defending against the obvious
65 /// hostile-length-prefix DoS. Tighten this in any context that accepts
66 /// untrusted input from a low-budget producer.
67 pub max_alloc: usize,
68}
69
70impl Default for Config {
71 fn default() -> Self {
72 Self::new()
73 }
74}
75
76impl Config {
77 /// Default configuration: `max_alloc = 1 GiB`.
78 ///
79 /// 1 GiB is large enough to be irrelevant for well-formed inputs and
80 /// small enough to refuse the obvious `length = u64::MAX` attack before
81 /// allocating a single byte.
82 ///
83 /// # Examples
84 ///
85 /// ```
86 /// let cfg = pack_io::Config::new();
87 /// assert_eq!(cfg.max_alloc, 1 << 30);
88 /// ```
89 #[must_use]
90 pub const fn new() -> Self {
91 Self { max_alloc: 1 << 30 }
92 }
93
94 /// Replace `max_alloc` and return the updated config.
95 ///
96 /// # Examples
97 ///
98 /// ```
99 /// let cfg = pack_io::Config::new().with_max_alloc(4096);
100 /// assert_eq!(cfg.max_alloc, 4096);
101 /// ```
102 #[must_use]
103 pub const fn with_max_alloc(mut self, max_alloc: usize) -> Self {
104 self.max_alloc = max_alloc;
105 self
106 }
107
108 /// Validate the configuration. Returns an error if any field is
109 /// nonsensical.
110 pub(crate) fn validate(self) -> Result<Self> {
111 if self.max_alloc == 0 {
112 return Err(SerialError::InvalidLength {
113 declared: 0,
114 remaining: 0,
115 });
116 }
117 Ok(self)
118 }
119}
120
121// ---------------------------------------------------------------------------
122// Encode / Decode behaviour traits
123// ---------------------------------------------------------------------------
124
125/// Sink that a [`Serialize`] implementation writes its wire-format bytes
126/// into.
127///
128/// Implemented by every concrete encoder in the crate ([`Encoder`] for the
129/// in-memory case, [`crate::IoEncoder`] for `std::io::Write` streams). User
130/// code rarely implements `Encode` directly — `Serialize` impls are written
131/// generically over `E: Encode` so a single impl works for every encoder
132/// flavour.
133///
134/// # Examples
135///
136/// ```
137/// use pack_io::{Encode, Encoder, Result};
138///
139/// // A helper that writes a length-prefixed list of `u32`s into any encoder.
140/// fn write_u32_list<E: Encode>(enc: &mut E, items: &[u32]) -> Result<()> {
141/// enc.write_varint_u64(items.len() as u64)?;
142/// for item in items {
143/// enc.write_varint_u64(u64::from(*item))?;
144/// }
145/// Ok(())
146/// }
147///
148/// let mut enc = Encoder::new();
149/// write_u32_list(&mut enc, &[1, 2, 3]).unwrap();
150/// ```
151pub trait Encode {
152 /// Append a single byte.
153 ///
154 /// # Errors
155 ///
156 /// Returns the encoder's underlying error variant (I/O failure for
157 /// streaming encoders; never errors for the in-memory [`Encoder`]).
158 fn write_byte(&mut self, byte: u8) -> Result<()>;
159
160 /// Append a slice of bytes.
161 ///
162 /// # Errors
163 ///
164 /// Same as [`Encode::write_byte`].
165 fn write_bytes(&mut self, bytes: &[u8]) -> Result<()>;
166
167 /// Hint that the caller is about to write `additional` more bytes.
168 ///
169 /// In-memory encoders MAY pre-allocate the requested capacity to avoid
170 /// intermediate `Vec` growth. Streaming encoders typically ignore the
171 /// hint. The default implementation is a no-op.
172 #[inline]
173 fn reserve(&mut self, additional: usize) {
174 let _ = additional;
175 }
176
177 /// Append a `u64` as an unsigned LEB128 varint (1–10 bytes).
178 ///
179 /// # Errors
180 ///
181 /// Same as [`Encode::write_bytes`].
182 #[inline]
183 fn write_varint_u64(&mut self, value: u64) -> Result<()> {
184 // Fast path for the overwhelmingly common case: value fits in a
185 // single byte. Skips the stack buffer + write_bytes round-trip.
186 if value < 0x80 {
187 return self.write_byte(value as u8);
188 }
189 let mut buf = [0u8; varint::MAX_VARINT_LEN_U64];
190 let n = varint::write_u64(value, &mut buf);
191 self.write_bytes(&buf[..n])
192 }
193
194 /// Append a `u128` as an unsigned LEB128 varint (1–19 bytes).
195 ///
196 /// # Errors
197 ///
198 /// Same as [`Encode::write_bytes`].
199 #[inline]
200 fn write_varint_u128(&mut self, value: u128) -> Result<()> {
201 let mut buf = [0u8; varint::MAX_VARINT_LEN_U128];
202 let n = varint::write_u128(value, &mut buf);
203 self.write_bytes(&buf[..n])
204 }
205}
206
207/// Source that a [`Deserialize`] implementation reads its wire-format bytes
208/// from.
209///
210/// Implemented by every concrete decoder in the crate ([`Decoder`] for the
211/// in-memory case, [`crate::IoDecoder`] for `std::io::Read` streams). User
212/// code rarely implements `Decode` directly — `Deserialize` impls are
213/// written generically over `D: Decode`.
214///
215/// All methods are **total**: on any byte sequence they either succeed
216/// (advancing the cursor) or return a [`SerialError`]. They never panic,
217/// never read past the input, and never allocate more memory than
218/// [`Decode::max_alloc`] permits.
219pub trait Decode {
220 /// Read the next byte, advancing the cursor.
221 ///
222 /// # Errors
223 ///
224 /// Returns [`SerialError::UnexpectedEof`] if the input is exhausted.
225 /// Streaming decoders MAY return an I/O-flavoured error variant.
226 fn read_byte(&mut self) -> Result<u8>;
227
228 /// Fill `out` with exactly `out.len()` bytes, advancing the cursor.
229 ///
230 /// # Errors
231 ///
232 /// Returns [`SerialError::UnexpectedEof`] on short read.
233 fn read_into(&mut self, out: &mut [u8]) -> Result<()>;
234
235 /// Maximum number of bytes the decoder will allocate for a single
236 /// length-prefixed value. Mirrors [`Config::max_alloc`].
237 fn max_alloc(&self) -> usize;
238
239 /// Read a LEB128 varint as a `u64`.
240 ///
241 /// # Errors
242 ///
243 /// Returns [`SerialError::VarintOverflow`] for an overlong encoding,
244 /// or [`SerialError::UnexpectedEof`] for a truncated one.
245 #[inline]
246 fn read_varint_u64(&mut self) -> Result<u64> {
247 // Fast path for single-byte varints (values 0..=127, the
248 // overwhelmingly common case for length prefixes and small ints).
249 let first = self.read_byte()?;
250 if first < 0x80 {
251 return Ok(u64::from(first));
252 }
253 let mut result: u64 = u64::from(first & 0x7f);
254 let mut shift: u32 = 7;
255 for consumed in 2..=varint::MAX_VARINT_LEN_U64 {
256 let byte = self.read_byte()?;
257 // The 10th byte may only set bit 0 — anything else overflows u64.
258 if consumed == varint::MAX_VARINT_LEN_U64 && (byte & 0xfe) != 0 {
259 return Err(SerialError::VarintOverflow);
260 }
261 result |= u64::from(byte & 0x7f) << shift;
262 if byte & 0x80 == 0 {
263 return Ok(result);
264 }
265 shift += 7;
266 }
267 Err(SerialError::VarintOverflow)
268 }
269
270 /// Read a LEB128 varint as a `u128`.
271 ///
272 /// # Errors
273 ///
274 /// See [`Decode::read_varint_u64`].
275 #[inline]
276 fn read_varint_u128(&mut self) -> Result<u128> {
277 let mut result: u128 = 0;
278 let mut shift: u32 = 0;
279 for consumed in 1..=varint::MAX_VARINT_LEN_U128 {
280 let byte = self.read_byte()?;
281 // The 19th byte may only set the low two bits.
282 if consumed == varint::MAX_VARINT_LEN_U128 && (byte & 0xfc) != 0 {
283 return Err(SerialError::VarintOverflow);
284 }
285 result |= u128::from(byte & 0x7f) << shift;
286 if byte & 0x80 == 0 {
287 return Ok(result);
288 }
289 shift += 7;
290 }
291 Err(SerialError::VarintOverflow)
292 }
293
294 /// Read a length-prefixed byte run, allocating a fresh `Vec<u8>`.
295 ///
296 /// The length is read as a varint, validated against
297 /// [`Decode::max_alloc`], then the corresponding number of bytes is
298 /// read from the underlying source.
299 ///
300 /// # Errors
301 ///
302 /// - [`SerialError::InvalidLength`] if the prefix exceeds `max_alloc`.
303 /// - [`SerialError::UnexpectedEof`] if the source runs out before the
304 /// declared length is satisfied.
305 #[inline]
306 fn read_length_prefixed(&mut self) -> Result<Vec<u8>> {
307 let declared = self.read_varint_u64()?;
308 let max = self.max_alloc() as u64;
309 if declared > max {
310 return Err(SerialError::InvalidLength {
311 declared,
312 remaining: 0,
313 });
314 }
315 let len = declared as usize;
316 let mut buf = vec![0u8; len];
317 self.read_into(&mut buf)?;
318 Ok(buf)
319 }
320}
321
322// ---------------------------------------------------------------------------
323// In-memory Encoder
324// ---------------------------------------------------------------------------
325
326/// In-memory encoder. Writes into an owned `Vec<u8>`; the buffer can be
327/// reused across encodes by calling [`Encoder::take`] to swap it out.
328///
329/// Implements [`Encode`], so [`Serialize`] impls written generically over
330/// `E: Encode` work directly through it.
331///
332/// # Examples
333///
334/// ```
335/// use pack_io::Encoder;
336///
337/// let mut enc = Encoder::new();
338/// enc.write(&7_u64).unwrap();
339/// enc.write(&"hello").unwrap();
340/// let bytes = enc.into_inner();
341/// assert!(bytes.len() > 0);
342/// ```
343#[derive(Debug, Default)]
344pub struct Encoder {
345 out: Vec<u8>,
346}
347
348impl Encoder {
349 /// Construct an encoder with an empty output buffer.
350 ///
351 /// # Examples
352 ///
353 /// ```
354 /// let enc = pack_io::Encoder::new();
355 /// assert!(enc.as_bytes().is_empty());
356 /// ```
357 #[must_use]
358 pub fn new() -> Self {
359 Self { out: Vec::new() }
360 }
361
362 /// Construct an encoder with an output buffer pre-allocated to
363 /// `capacity` bytes.
364 ///
365 /// Choose this over [`Encoder::new`] when the encoded size is roughly
366 /// known: a single `Vec::with_capacity` up front avoids the four to
367 /// eight grow-and-copy reallocations that a zero-capacity `Vec`
368 /// performs while doubling to the final size.
369 ///
370 /// `capacity` is a hint — the encoder still grows the buffer if the
371 /// encoded value exceeds it. Setting it slightly too high is harmless;
372 /// setting it slightly too low costs at most one growth.
373 ///
374 /// The Tier-1 [`crate::encode`] free function uses a small default
375 /// capacity internally so most one-shot encodes never grow at all.
376 ///
377 /// # Examples
378 ///
379 /// ```
380 /// let enc = pack_io::Encoder::with_capacity(256);
381 /// assert!(enc.as_bytes().is_empty());
382 /// ```
383 #[must_use]
384 pub fn with_capacity(capacity: usize) -> Self {
385 Self {
386 out: Vec::with_capacity(capacity),
387 }
388 }
389
390 /// Construct an encoder backed by `buffer`. The encoder appends to the
391 /// buffer rather than allocating its own — callers that re-use a single
392 /// `Vec<u8>` across many encodes avoid the per-call allocation.
393 ///
394 /// # Examples
395 ///
396 /// ```
397 /// use pack_io::Encoder;
398 ///
399 /// let buf = Vec::with_capacity(64);
400 /// let mut enc = Encoder::into_buffer(buf);
401 /// enc.write(&42_u64).unwrap();
402 /// let buf = enc.into_inner();
403 /// assert!(!buf.is_empty());
404 /// ```
405 #[must_use]
406 pub fn into_buffer(buffer: Vec<u8>) -> Self {
407 Self { out: buffer }
408 }
409
410 /// Borrow the encoded bytes accumulated so far, without consuming the
411 /// encoder.
412 ///
413 /// Prefer this over [`Encoder::into_inner`] when the caller wants to
414 /// send / hash / inspect the bytes but keep writing more values.
415 ///
416 /// # Examples
417 ///
418 /// ```
419 /// use pack_io::Encoder;
420 ///
421 /// let mut enc = Encoder::new();
422 /// enc.write(&1_u64).unwrap();
423 /// let snapshot = enc.as_bytes().to_vec();
424 /// enc.write(&2_u64).unwrap();
425 /// assert_eq!(snapshot, &[0x01]);
426 /// assert_eq!(enc.as_bytes(), &[0x01, 0x02]);
427 /// ```
428 #[inline]
429 #[must_use]
430 pub fn as_bytes(&self) -> &[u8] {
431 &self.out
432 }
433
434 /// Consume the encoder and return its underlying buffer.
435 ///
436 /// The returned `Vec<u8>` is the exact bytes accumulated by every
437 /// preceding `write` call.
438 ///
439 /// # Examples
440 ///
441 /// ```
442 /// use pack_io::Encoder;
443 ///
444 /// let mut enc = Encoder::new();
445 /// enc.write(&7_u64).unwrap();
446 /// let bytes: Vec<u8> = enc.into_inner();
447 /// assert_eq!(bytes, &[0x07]);
448 /// ```
449 #[inline]
450 #[must_use]
451 pub fn into_inner(self) -> Vec<u8> {
452 self.out
453 }
454
455 /// Swap the encoder's buffer with a fresh empty one, returning the bytes
456 /// written so far. Useful for "encode then send" loops that want to
457 /// re-use the encoder across many messages.
458 ///
459 /// After `take`, the encoder is empty and ready to encode the next
460 /// message. The returned buffer is the previous contents.
461 ///
462 /// # Examples
463 ///
464 /// ```
465 /// use pack_io::Encoder;
466 ///
467 /// let mut enc = Encoder::new();
468 /// enc.write(&1_u64).unwrap();
469 /// let first = enc.take();
470 /// enc.write(&2_u64).unwrap();
471 /// let second = enc.take();
472 /// assert_eq!(first, &[0x01]);
473 /// assert_eq!(second, &[0x02]);
474 /// assert!(enc.as_bytes().is_empty());
475 /// ```
476 #[must_use]
477 pub fn take(&mut self) -> Vec<u8> {
478 core::mem::take(&mut self.out)
479 }
480
481 /// Encode `value`, appending its bytes to the internal buffer.
482 ///
483 /// This is the [`Serialize`]-aware sibling of [`Encode::write_bytes`].
484 /// Prefer it for typed values; reserve `write_bytes` for raw byte
485 /// passthrough.
486 ///
487 /// # Errors
488 ///
489 /// Propagates any error returned by the type's [`Serialize`]
490 /// implementation. The built-in primitive and collection impls in this
491 /// crate never error on an in-memory encoder.
492 ///
493 /// # Examples
494 ///
495 /// Writing several values in sequence:
496 ///
497 /// ```
498 /// use pack_io::Encoder;
499 ///
500 /// let mut enc = Encoder::new();
501 /// enc.write(&7_u64).unwrap();
502 /// enc.write(&"hello").unwrap();
503 /// enc.write(&vec![1u8, 2, 3]).unwrap();
504 /// assert!(!enc.as_bytes().is_empty());
505 /// ```
506 ///
507 /// Encoding a tuple in one call (anything that implements `Serialize`
508 /// works):
509 ///
510 /// ```
511 /// use pack_io::Encoder;
512 ///
513 /// let mut enc = Encoder::new();
514 /// enc.write(&(7_u64, "hello", true)).unwrap();
515 /// assert!(!enc.as_bytes().is_empty());
516 /// ```
517 #[inline]
518 pub fn write<T: Serialize + ?Sized>(&mut self, value: &T) -> Result<()> {
519 value.serialize(self)
520 }
521}
522
523impl Encode for Encoder {
524 #[inline(always)]
525 fn write_byte(&mut self, byte: u8) -> Result<()> {
526 self.out.push(byte);
527 Ok(())
528 }
529
530 #[inline(always)]
531 fn write_bytes(&mut self, bytes: &[u8]) -> Result<()> {
532 self.out.extend_from_slice(bytes);
533 Ok(())
534 }
535
536 #[inline(always)]
537 fn reserve(&mut self, additional: usize) {
538 self.out.reserve(additional);
539 }
540
541 /// Override of [`Encode::write_varint_u64`] specialised for the in-memory
542 /// encoder. Pushes each varint byte directly onto the underlying `Vec`,
543 /// reserving the full max-width up front so the loop never re-checks
544 /// capacity. Avoids the stack-buffer + `extend_from_slice` round-trip
545 /// the default impl would perform.
546 #[inline]
547 fn write_varint_u64(&mut self, value: u64) -> Result<()> {
548 if value < 0x80 {
549 self.out.push(value as u8);
550 return Ok(());
551 }
552 // Up to 10 bytes for u64. Reserve once, then push without further
553 // capacity checks.
554 self.out.reserve(varint::MAX_VARINT_LEN_U64);
555 let mut n = value;
556 while n >= 0x80 {
557 self.out.push((n as u8) | 0x80);
558 n >>= 7;
559 }
560 self.out.push(n as u8);
561 Ok(())
562 }
563
564 /// Same specialisation as [`Encode::write_varint_u64`], widened to 128
565 /// bits.
566 #[inline]
567 fn write_varint_u128(&mut self, value: u128) -> Result<()> {
568 if value < 0x80 {
569 self.out.push(value as u8);
570 return Ok(());
571 }
572 self.out.reserve(varint::MAX_VARINT_LEN_U128);
573 let mut n = value;
574 while n >= 0x80 {
575 self.out.push((n as u8) | 0x80);
576 n >>= 7;
577 }
578 self.out.push(n as u8);
579 Ok(())
580 }
581}
582
583// ---------------------------------------------------------------------------
584// In-memory Decoder
585// ---------------------------------------------------------------------------
586
587/// In-memory decoder. Borrows from an input slice and advances a position
588/// pointer as values are read. Bounds-checked on every operation.
589///
590/// Implements [`Decode`], so [`Deserialize`] impls written generically over
591/// `D: Decode` work directly through it.
592///
593/// # Examples
594///
595/// ```
596/// use pack_io::{Encoder, Decoder};
597///
598/// let mut enc = Encoder::new();
599/// enc.write(&7_u64).unwrap();
600/// enc.write(&true).unwrap();
601/// let bytes = enc.into_inner();
602///
603/// let mut dec = Decoder::new(&bytes);
604/// let n: u64 = dec.read().unwrap();
605/// let b: bool = dec.read().unwrap();
606/// assert_eq!(n, 7);
607/// assert!(b);
608/// assert!(dec.is_empty());
609/// ```
610#[derive(Debug)]
611pub struct Decoder<'a> {
612 input: &'a [u8],
613 pos: usize,
614 config: Config,
615}
616
617impl<'a> Decoder<'a> {
618 /// Construct a decoder over `bytes` using the default [`Config`]
619 /// (1 GiB `max_alloc`).
620 ///
621 /// For tighter allocation limits on untrusted input, use
622 /// [`Decoder::with_config`] instead.
623 ///
624 /// # Examples
625 ///
626 /// ```
627 /// use pack_io::Decoder;
628 ///
629 /// let bytes = pack_io::encode(&42_u64).unwrap();
630 /// let mut dec = Decoder::new(&bytes);
631 /// let n: u64 = dec.read().unwrap();
632 /// assert_eq!(n, 42);
633 /// ```
634 #[inline]
635 #[must_use]
636 pub fn new(bytes: &'a [u8]) -> Self {
637 Self {
638 input: bytes,
639 pos: 0,
640 config: Config::default(),
641 }
642 }
643
644 /// Construct a decoder with the supplied configuration.
645 ///
646 /// Use this when the input comes from an untrusted producer and the
647 /// caller wants to bound per-value allocations.
648 ///
649 /// # Errors
650 ///
651 /// Returns [`SerialError::InvalidLength`] if `config.max_alloc == 0`.
652 ///
653 /// # Examples
654 ///
655 /// ```
656 /// use pack_io::{Config, Decoder};
657 ///
658 /// let cfg = Config::new().with_max_alloc(16 * 1024); // 16 KiB cap
659 /// let bytes = pack_io::encode(&"hello").unwrap();
660 /// let mut dec = Decoder::with_config(&bytes, cfg).unwrap();
661 /// let s: String = dec.read().unwrap();
662 /// assert_eq!(s, "hello");
663 /// ```
664 pub fn with_config(bytes: &'a [u8], config: Config) -> Result<Self> {
665 Ok(Self {
666 input: bytes,
667 pos: 0,
668 config: config.validate()?,
669 })
670 }
671
672 /// Bytes consumed so far from the start of the input.
673 ///
674 /// # Examples
675 ///
676 /// ```
677 /// use pack_io::Decoder;
678 ///
679 /// let bytes = pack_io::encode(&(1_u8, 2_u8)).unwrap();
680 /// let mut dec = Decoder::new(&bytes);
681 /// assert_eq!(dec.position(), 0);
682 /// let _: u8 = dec.read().unwrap();
683 /// assert_eq!(dec.position(), 1);
684 /// ```
685 #[inline]
686 #[must_use]
687 pub fn position(&self) -> usize {
688 self.pos
689 }
690
691 /// Number of bytes remaining in the input.
692 ///
693 /// # Examples
694 ///
695 /// ```
696 /// use pack_io::Decoder;
697 ///
698 /// let bytes = pack_io::encode(&(1_u8, 2_u8, 3_u8)).unwrap();
699 /// let mut dec = Decoder::new(&bytes);
700 /// assert_eq!(dec.remaining(), 3);
701 /// let _: u8 = dec.read().unwrap();
702 /// assert_eq!(dec.remaining(), 2);
703 /// ```
704 #[inline]
705 #[must_use]
706 pub fn remaining(&self) -> usize {
707 self.input.len().saturating_sub(self.pos)
708 }
709
710 /// True when there are no more bytes to read.
711 ///
712 /// Useful as the loop condition for multi-value decode passes that
713 /// don't have an explicit count up front.
714 ///
715 /// # Examples
716 ///
717 /// ```
718 /// use pack_io::{Encoder, Decoder};
719 ///
720 /// let mut enc = Encoder::new();
721 /// enc.write(&1_u64).unwrap();
722 /// enc.write(&2_u64).unwrap();
723 /// let bytes = enc.into_inner();
724 ///
725 /// let mut dec = Decoder::new(&bytes);
726 /// let mut sum = 0_u64;
727 /// while !dec.is_empty() {
728 /// sum += dec.read::<u64>().unwrap();
729 /// }
730 /// assert_eq!(sum, 3);
731 /// ```
732 #[inline]
733 #[must_use]
734 pub fn is_empty(&self) -> bool {
735 self.remaining() == 0
736 }
737
738 /// Decode a value of type `T` from the current position, advancing the
739 /// cursor by the number of bytes the value consumed.
740 ///
741 /// # Errors
742 ///
743 /// Returns any [`SerialError`] surfaced by `T::deserialize`.
744 ///
745 /// # Examples
746 ///
747 /// Reading several values in sequence:
748 ///
749 /// ```
750 /// use pack_io::{Encoder, Decoder};
751 ///
752 /// let mut enc = Encoder::new();
753 /// enc.write(&7_u64).unwrap();
754 /// enc.write(&"hello").unwrap();
755 /// let bytes = enc.into_inner();
756 ///
757 /// let mut dec = Decoder::new(&bytes);
758 /// let n: u64 = dec.read().unwrap();
759 /// let s: String = dec.read().unwrap();
760 /// assert_eq!((n, s.as_str()), (7, "hello"));
761 /// ```
762 ///
763 /// Reading a tuple in a single call:
764 ///
765 /// ```
766 /// use pack_io::Decoder;
767 ///
768 /// let bytes = pack_io::encode(&(7_u64, true)).unwrap();
769 /// let mut dec = Decoder::new(&bytes);
770 /// let pair: (u64, bool) = dec.read().unwrap();
771 /// assert_eq!(pair, (7, true));
772 /// ```
773 #[inline]
774 pub fn read<T: Deserialize>(&mut self) -> Result<T> {
775 T::deserialize(self)
776 }
777
778 /// Read a length-prefixed byte run as a **borrowed** slice of the
779 /// underlying input — no allocation, no copy.
780 ///
781 /// The borrowed slice has the same lifetime `'a` as the decoder's
782 /// input buffer, which lets caller-side `&'a str` / `&'a [u8]` decode
783 /// paths return a borrow directly into that buffer. This is the seam
784 /// the zero-copy [`crate::DeserializeView`] surface plugs into for
785 /// `&'a str` and `&'a [u8]`.
786 ///
787 /// # Errors
788 ///
789 /// - [`SerialError::InvalidLength`] if the prefix exceeds the
790 /// configured `max_alloc`, OR exceeds the remaining input.
791 /// - [`SerialError::UnexpectedEof`] is folded into `InvalidLength` for
792 /// this method, since the buffer length is known up front and a
793 /// declared length running off the end is logically a length-prefix
794 /// error, not a streaming EOF.
795 #[inline]
796 pub fn read_length_prefixed_borrowed(&mut self) -> Result<&'a [u8]> {
797 let declared = <Self as Decode>::read_varint_u64(self)?;
798 let max = self.config.max_alloc as u64;
799 if declared > max {
800 return Err(SerialError::InvalidLength {
801 declared,
802 remaining: self.remaining(),
803 });
804 }
805 let len = declared as usize;
806 let remaining = self.remaining();
807 if len > remaining {
808 return Err(SerialError::InvalidLength {
809 declared,
810 remaining,
811 });
812 }
813 let start = self.pos;
814 let end = start + len;
815 let slice = &self.input[start..end];
816 self.pos = end;
817 Ok(slice)
818 }
819}
820
821impl Decode for Decoder<'_> {
822 #[inline]
823 fn read_byte(&mut self) -> Result<u8> {
824 match self.input.get(self.pos) {
825 Some(&b) => {
826 self.pos += 1;
827 Ok(b)
828 }
829 None => Err(SerialError::UnexpectedEof {
830 needed: 1,
831 remaining: 0,
832 }),
833 }
834 }
835
836 #[inline]
837 fn read_into(&mut self, out: &mut [u8]) -> Result<()> {
838 let n = out.len();
839 let remaining = self.remaining();
840 if n > remaining {
841 return Err(SerialError::UnexpectedEof {
842 needed: n,
843 remaining,
844 });
845 }
846 let start = self.pos;
847 let end = start + n;
848 out.copy_from_slice(&self.input[start..end]);
849 self.pos = end;
850 Ok(())
851 }
852
853 #[inline]
854 fn max_alloc(&self) -> usize {
855 self.config.max_alloc
856 }
857
858 /// In-memory specialisation: validates length against the actual buffer
859 /// length too, not just `max_alloc`. Catches truncated inputs without
860 /// allocating.
861 #[inline]
862 fn read_length_prefixed(&mut self) -> Result<Vec<u8>> {
863 let declared = self.read_varint_u64()?;
864 let max = self.config.max_alloc as u64;
865 if declared > max {
866 return Err(SerialError::InvalidLength {
867 declared,
868 remaining: self.remaining(),
869 });
870 }
871 let len = declared as usize;
872 let remaining = self.remaining();
873 if len > remaining {
874 return Err(SerialError::InvalidLength {
875 declared,
876 remaining,
877 });
878 }
879 let start = self.pos;
880 let end = start + len;
881 let slice = &self.input[start..end];
882 self.pos = end;
883 Ok(slice.to_vec())
884 }
885}
886
887// ---------------------------------------------------------------------------
888// Tier-1 free functions
889// ---------------------------------------------------------------------------
890
891/// Encode `value` into a freshly allocated `Vec<u8>`.
892///
893/// This is the **Tier-1** entry point — the one-line surface for the common
894/// case. Allocates one buffer sized to fit the encoded value.
895///
896/// # Examples
897///
898/// ```
899/// let bytes = pack_io::encode(&42_u64).unwrap();
900/// let back: u64 = pack_io::decode(&bytes).unwrap();
901/// assert_eq!(back, 42);
902/// ```
903///
904/// # Errors
905///
906/// Propagates any error returned by the type's [`Serialize`] implementation.
907/// The built-in primitive and collection impls never error on an in-memory
908/// encoder.
909#[inline]
910pub fn encode<T: Serialize + ?Sized>(value: &T) -> Result<Vec<u8>> {
911 // Pre-reserve enough to hold a typical small-to-medium message in a
912 // single allocation. A zero-capacity `Vec` doubles 8+ times before
913 // hitting 512 bytes, with each doubling memcpy-ing the prior contents
914 // — accounting for a large fraction of the encode-time gap vs codecs
915 // that pre-size their output buffer. 512 bytes covers most network
916 // messages without growth; larger payloads pay at most one or two
917 // doublings instead of the eight-plus a fresh `Vec` would.
918 let mut enc = Encoder::with_capacity(512);
919 value.serialize(&mut enc)?;
920 Ok(enc.into_inner())
921}
922
923/// Peek the schema version of a payload produced by a `#[pack_io(version = N)]`
924/// type without consuming the buffer.
925///
926/// Reads only the leading varint and returns it as `u32`, leaving the
927/// caller free to dispatch decode to the right `T` based on what they find.
928/// On a non-versioned payload (no `#[pack_io(version = N)]` on the type)
929/// this returns whatever the first varint of the encoding happens to be —
930/// callers should only use it on payloads they know are versioned.
931///
932/// # Examples
933///
934/// ```
935/// # #[cfg(feature = "derive")] {
936/// use pack_io::{encode, peek_version, Serialize, Deserialize};
937///
938/// #[derive(Serialize, Deserialize)]
939/// #[pack_io(version = 2)]
940/// struct Msg { id: u64 }
941///
942/// let bytes = encode(&Msg { id: 7 }).unwrap();
943/// assert_eq!(peek_version(&bytes).unwrap(), 2);
944/// # }
945/// ```
946///
947/// # Errors
948///
949/// - [`SerialError::UnexpectedEof`] if `bytes` is empty or the leading
950/// varint is truncated.
951/// - [`SerialError::VarintOverflow`] / [`SerialError::IntegerOutOfRange`]
952/// if the leading varint does not fit in `u32`.
953#[inline]
954pub fn peek_version(bytes: &[u8]) -> Result<u32> {
955 let mut dec = Decoder::new(bytes);
956 let v = dec.read_varint_u64()?;
957 u32::try_from(v).map_err(|_| SerialError::IntegerOutOfRange)
958}
959
960/// Decode a value of type `T` from `bytes`, requiring the input to be fully
961/// consumed.
962///
963/// This is the **Tier-1** entry point — the one-line surface for the common
964/// case. After the value has been read, the decoder checks that no bytes
965/// remain; trailing input is reported as [`SerialError::TrailingBytes`].
966/// Callers that want to read several values from a single buffer should use
967/// [`Decoder`] directly.
968///
969/// # Examples
970///
971/// ```
972/// let bytes = pack_io::encode(&"hello").unwrap();
973/// let back: String = pack_io::decode(&bytes).unwrap();
974/// assert_eq!(back, "hello");
975/// ```
976///
977/// # Errors
978///
979/// - Returns [`SerialError::TrailingBytes`] when extra bytes follow the value.
980/// - Propagates any [`SerialError`] from the type's [`Deserialize`] impl.
981#[inline]
982pub fn decode<T: Deserialize>(bytes: &[u8]) -> Result<T> {
983 let mut dec = Decoder::new(bytes);
984 let value = T::deserialize(&mut dec)?;
985 let remaining = dec.remaining();
986 if remaining != 0 {
987 return Err(SerialError::TrailingBytes { remaining });
988 }
989 Ok(value)
990}
991
992#[cfg(test)]
993mod tests {
994 use super::*;
995
996 #[test]
997 fn config_default_has_one_gib_cap() {
998 let cfg = Config::default();
999 assert_eq!(cfg.max_alloc, 1 << 30);
1000 }
1001
1002 #[test]
1003 fn decoder_with_zero_cap_is_rejected() {
1004 let cfg = Config::new().with_max_alloc(0);
1005 let err = Decoder::with_config(&[], cfg).expect_err("zero cap is invalid");
1006 assert!(matches!(err, SerialError::InvalidLength { .. }));
1007 }
1008
1009 #[test]
1010 fn encoder_into_buffer_reuses_caller_vec() {
1011 let mut buf = Vec::with_capacity(64);
1012 buf.push(0xff);
1013 let mut enc = Encoder::into_buffer(buf);
1014 enc.write(&7_u64).unwrap();
1015 let out = enc.into_inner();
1016 assert_eq!(out[0], 0xff);
1017 assert!(out.len() > 1);
1018 }
1019
1020 #[test]
1021 fn encoder_take_returns_buffer_and_resets() {
1022 let mut enc = Encoder::new();
1023 enc.write(&1_u64).unwrap();
1024 let first = enc.take();
1025 assert!(!first.is_empty());
1026 assert!(enc.as_bytes().is_empty());
1027
1028 enc.write(&2_u64).unwrap();
1029 let second = enc.take();
1030 assert_eq!(second, [0x02]);
1031 }
1032
1033 #[test]
1034 fn decode_rejects_trailing_bytes() {
1035 let mut bytes = encode(&7_u8).unwrap();
1036 bytes.push(0xff);
1037 let err = decode::<u8>(&bytes).expect_err("trailing bytes should fail");
1038 assert!(matches!(err, SerialError::TrailingBytes { remaining: 1 }));
1039 }
1040
1041 #[test]
1042 fn decoder_read_past_end_returns_unexpected_eof() {
1043 let mut dec = Decoder::new(&[0x01]);
1044 let _: u8 = dec.read().unwrap();
1045 let err = dec.read::<u8>().expect_err("past end should fail");
1046 assert!(matches!(err, SerialError::UnexpectedEof { .. }));
1047 }
1048
1049 #[test]
1050 fn decoder_length_prefix_above_cap_is_rejected() {
1051 let cfg = Config::new().with_max_alloc(4);
1052 let bytes = [0x05, b'h', b'e', b'l', b'l', b'o'];
1053 let mut dec = Decoder::with_config(&bytes, cfg).expect("non-zero cap");
1054 let err = dec
1055 .read_length_prefixed()
1056 .expect_err("length > cap should fail");
1057 assert!(matches!(
1058 err,
1059 SerialError::InvalidLength { declared: 5, .. }
1060 ));
1061 }
1062
1063 #[test]
1064 fn decoder_length_prefix_overflowing_remaining_is_rejected() {
1065 let bytes = [0x10, b'a', b'b'];
1066 let mut dec = Decoder::new(&bytes);
1067 let err = dec
1068 .read_length_prefixed()
1069 .expect_err("length > remaining should fail");
1070 assert!(matches!(err, SerialError::InvalidLength { .. }));
1071 }
1072
1073 #[test]
1074 fn decoder_position_advances_with_reads() {
1075 let bytes = [0x01, 0x02, 0x03];
1076 let mut dec = Decoder::new(&bytes);
1077 assert_eq!(dec.position(), 0);
1078 let _ = dec.read_byte().unwrap();
1079 assert_eq!(dec.position(), 1);
1080 let mut buf = [0u8; 2];
1081 dec.read_into(&mut buf).unwrap();
1082 assert_eq!(dec.position(), 3);
1083 assert!(dec.is_empty());
1084 }
1085
1086 #[test]
1087 fn read_into_short_read_is_rejected() {
1088 let mut dec = Decoder::new(&[0x01, 0x02]);
1089 let mut buf = [0u8; 4];
1090 let err = dec.read_into(&mut buf).expect_err("short read");
1091 assert!(matches!(err, SerialError::UnexpectedEof { .. }));
1092 }
1093}