qubit_codec/codec/codec.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Low-level value codec trait.
11
12use core::num::NonZeroUsize;
13
14/// Encodes and decodes one value or codec quantum against a unit buffer.
15///
16/// `Codec` is the lowest-level abstraction in the codec stack. It is intended
17/// for hot paths that have already validated buffer capacity and want to avoid
18/// constructing subslices for every value. Higher-level transcoders and
19/// convenience APIs are responsible for checked buffer management and owned
20/// output allocation.
21///
22/// `min_units_per_value` and `max_units_per_value` describe the representation
23/// width bounds for one value. The minimum is a lower-bound hint for checked
24/// layers: if fewer than this many units are available, no complete value can
25/// exist, so a streaming caller can request more input, report an incomplete
26/// EOF tail. For decoding, this minimum is the smallest safety precondition
27/// checked callers must satisfy before entering
28/// [`decode_unchecked`](Self::decode_unchecked). The maximum is the conservative
29/// bound callers normally use to prove that unchecked writes stay inside the
30/// provided output buffer.
31///
32/// # Associated Types
33///
34/// - `Value`: Logical value decoded from or encoded into the buffer. This may be
35/// a scalar such as `u64`, a `char`, or a fixed quantum such as `[u8; 3]`.
36/// - `Unit`: Buffer unit used by the encoded representation.
37///
38/// # Safety
39///
40/// Implementors must uphold the safety contract documented by
41/// [`decode_unchecked`](Self::decode_unchecked) and
42/// [`encode_unchecked`](Self::encode_unchecked). In particular, unchecked
43/// implementations must not read or write outside the caller-provided ranges.
44/// Implementations should use `debug_assert!` to state the expected buffer
45/// bounds at the unchecked entry point.
46///
47/// Implementations must also guarantee that
48/// [`min_units_per_value`](Self::min_units_per_value) is less than or equal to
49/// [`max_units_per_value`](Self::max_units_per_value). Both bounds are non-zero
50/// by type, and `max_units_per_value` must be a valid upper bound for one
51/// complete encoded value or codec quantum. Checked adapters assert this
52/// invariant before using codec-provided bounds.
53pub unsafe trait Codec {
54 /// Logical value decoded from or encoded into the buffer.
55 type Value;
56
57 /// Buffer unit used by the encoded representation.
58 type Unit: Copy;
59
60 /// Error reported when decoding malformed units.
61 type DecodeError;
62
63 /// Error reported when encoding an unsupported value.
64 type EncodeError;
65
66 /// Returns the minimum possible unit count for one encoded value.
67 ///
68 /// This is a lower bound used by checked callers for planning and fast
69 /// impossibility checks. If a streaming decoder has fewer than this many
70 /// readable units, no complete value can be present at the current position.
71 /// If the stream has reached EOF, such a tail is necessarily incomplete;
72 /// otherwise the caller should read more input. Similarly, an encoder or
73 /// transcoder can avoid calling into the codec when the remaining output
74 /// capacity is smaller than this lower bound.
75 ///
76 /// This value does not prove that encoding will fit. For variable-width
77 /// representations, a value may require more units, up to
78 /// [`max_units_per_value`](Self::max_units_per_value). For decoding, this is
79 /// the minimum safety precondition required by
80 /// [`decode_unchecked`](Self::decode_unchecked); if fewer units are
81 /// available, a checked caller must request more input or report a closed
82 /// incomplete tail without calling into the unchecked method.
83 ///
84 /// # Returns
85 ///
86 /// Returns a non-zero lower bound for one complete value. Variable-width
87 /// codecs such as LEB128 should return the shortest valid representation
88 /// length. For example, a UTF-16 byte codec can return `2`, while its
89 /// maximum is `4` because a surrogate pair needs four bytes.
90 #[must_use]
91 fn min_units_per_value(&self) -> NonZeroUsize;
92
93 /// Returns the maximum non-zero unit count needed to encode or decode one value.
94 ///
95 /// # Returns
96 ///
97 /// Returns an upper bound for one complete value or codec quantum.
98 #[must_use]
99 fn max_units_per_value(&self) -> NonZeroUsize;
100
101 /// Decodes one value from `input` starting at `index`.
102 ///
103 /// # Parameters
104 ///
105 /// - `input`: Source unit buffer.
106 /// - `index`: Start index in `input`.
107 ///
108 /// # Returns
109 ///
110 /// Returns the decoded value and the non-zero number of consumed units.
111 ///
112 /// # Errors
113 ///
114 /// Returns `Self::DecodeError` when the units are malformed, non-canonical,
115 /// incomplete, or otherwise invalid for this codec. The concrete error type
116 /// carries the codec-specific reason and context.
117 ///
118 /// # Safety
119 ///
120 /// The caller must guarantee that `index` is a valid boundary in `input`
121 /// and that at least [`min_units_per_value`](Self::min_units_per_value)
122 /// units are readable from `index`. Implementations must not read beyond the
123 /// currently available units under that precondition. They may return
124 /// `Self::DecodeError` when those units are a valid but incomplete prefix.
125 ///
126 /// On success, implementations must return a consumed unit count no larger
127 /// than the available input. The return type guarantees that successful
128 /// decoding always consumes at least one unit. Implementations should use
129 /// `debug_assert!` to state these unchecked entry-point assumptions.
130 unsafe fn decode_unchecked(
131 &self,
132 input: &[Self::Unit],
133 index: usize,
134 ) -> Result<(Self::Value, NonZeroUsize), Self::DecodeError>;
135
136 /// Encodes one borrowed value into `output` starting at `index`.
137 ///
138 /// # Parameters
139 ///
140 /// - `value`: Value to encode.
141 /// - `output`: Destination unit buffer.
142 /// - `index`: Start index in `output`.
143 ///
144 /// # Returns
145 ///
146 /// Returns the number of written units. Implementations may return `0` to
147 /// represent a value that intentionally emits no encoded units.
148 ///
149 /// # Errors
150 ///
151 /// Returns `Self::EncodeError` when `value` cannot be represented by this
152 /// codec.
153 ///
154 /// # Safety
155 ///
156 /// The caller must guarantee that the implementation can write up to
157 /// [`max_units_per_value`](Self::max_units_per_value) units starting at
158 /// `index`. On success, implementations must return a written unit count no
159 /// larger than [`max_units_per_value`](Self::max_units_per_value).
160 unsafe fn encode_unchecked(
161 &self,
162 value: &Self::Value,
163 output: &mut [Self::Unit],
164 index: usize,
165 ) -> Result<usize, Self::EncodeError>;
166}
167
168/// Asserts the public unit-bound invariant required by [`Codec`].
169///
170/// # Type Parameters
171///
172/// - `C`: Codec implementation to validate.
173///
174/// # Returns
175///
176/// Returns unit `()`.
177///
178/// # Panics
179///
180/// Panics when [`Codec::min_units_per_value`] is greater than
181/// [`Codec::max_units_per_value`].
182pub(crate) fn assert_unit_bounds<C>(codec: &C)
183where
184 C: Codec,
185{
186 assert!(
187 codec.min_units_per_value() <= codec.max_units_per_value(),
188 "Codec::min_units_per_value() must not exceed Codec::max_units_per_value()",
189 );
190}