Skip to main content

qubit_codec/buffered/
buffered_decode_engine.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Reusable buffered decoder engine.
11
12use core::num::NonZeroUsize;
13
14use super::{
15    buffered_decode_hooks::BufferedDecodeHooks,
16    decode_action::DecodeAction,
17    decode_context::DecodeContext,
18    decode_state::DecodeState,
19    decode_step::DecodeStep,
20    finish_error::FinishError,
21    transcode_progress::TranscodeProgress,
22};
23use crate::{
24    CapacityError,
25    Codec,
26    codec::assert_unit_bounds,
27};
28
29/// Reusable buffered decoding engine for codec-backed decoders.
30///
31/// The engine owns the low-level codec and hook object. It keeps the common
32/// buffered decoding loop private: input-index validation, output-capacity
33/// checks, calls to [`Codec::decode_unchecked`], hook dispatch, and
34/// [`crate::TranscodeStatus`] reporting. Incomplete input tails are left in the
35/// caller-provided input slice; callers own input-buffer refill.
36///
37/// Use this type to build a streaming decoder over a one-value [`Codec`]. The
38/// engine decodes into a caller-provided output slice and returns
39/// [`TranscodeProgress`] instead of allocating. On success it writes decoded
40/// values directly to output. On codec errors it delegates to
41/// [`crate::BufferedDecodeHooks`], allowing a policy to request more input,
42/// skip invalid units, emit a replacement value, or fail.
43///
44/// The engine stops before reading an incomplete value when fewer than
45/// [`Codec::min_units_per_value`] units are available. For variable-width
46/// codecs, the codec may still return an incomplete decode error after that
47/// minimum is satisfied; hooks should convert that error into
48/// [`crate::DecodeAction::NeedInput`] when the stream may continue.
49///
50/// For strict decoding that wraps codec errors, use
51/// [`crate::CodecBufferedDecoder`]. Use `BufferedDecodeEngine` directly when
52/// invalid input should be repaired, skipped, counted, or otherwise handled by
53/// policy.
54///
55/// # Example
56///
57/// ```rust
58/// use core::num::NonZeroUsize;
59/// use qubit_codec::{
60///     BufferedDecodeEngine,
61///     BufferedDecodeHooks,
62///     Codec,
63///     DecodeAction,
64///     DecodeContext,
65///     TranscodeStatus,
66/// };
67///
68/// #[derive(Clone, Copy)]
69/// struct ByteCodec;
70///
71/// #[derive(Clone, Copy, Debug, Eq, PartialEq)]
72/// enum ByteDecodeError {
73///     Malformed { consumed: NonZeroUsize },
74/// }
75///
76/// unsafe impl Codec for ByteCodec {
77///     type Value = u8;
78///     type Unit = u8;
79///     type DecodeError = ByteDecodeError;
80///     type EncodeError = core::convert::Infallible;
81///
82///     fn min_units_per_value(&self) -> NonZeroUsize {
83///         NonZeroUsize::MIN
84///     }
85///
86///     fn max_units_per_value(&self) -> NonZeroUsize {
87///         NonZeroUsize::MIN
88///     }
89///
90///     unsafe fn decode_unchecked(
91///         &self,
92///         input: &[u8],
93///         index: usize,
94///     ) -> Result<(u8, NonZeroUsize), Self::DecodeError> {
95///         match input[index] {
96///             0xff => Err(ByteDecodeError::Malformed {
97///                 consumed: NonZeroUsize::MIN,
98///             }),
99///             value => Ok((value, NonZeroUsize::MIN)),
100///         }
101///     }
102///
103///     unsafe fn encode_unchecked(
104///         &self,
105///         value: &u8,
106///         output: &mut [u8],
107///         index: usize,
108///     ) -> Result<usize, Self::EncodeError> {
109///         output[index] = *value;
110///         Ok(1)
111///     }
112/// }
113///
114/// struct ReplacementHooks;
115///
116/// impl BufferedDecodeHooks<ByteCodec> for ReplacementHooks {
117///     type Error = ByteDecodeError;
118///
119///     fn handle_decode_error(
120///         &mut self,
121///         _codec: &ByteCodec,
122///         error: ByteDecodeError,
123///         _context: DecodeContext,
124///     ) -> Result<DecodeAction<u8>, Self::Error> {
125///         match error {
126///             ByteDecodeError::Malformed { consumed } => {
127///                 Ok(DecodeAction::Emit { value: b'?', consumed })
128///             }
129///         }
130///     }
131///
132///     fn invalid_input_index(
133///         &mut self,
134///         _codec: &ByteCodec,
135///         _index: usize,
136///         _input_len: usize,
137///     ) -> Self::Error {
138///         ByteDecodeError::Malformed {
139///             consumed: NonZeroUsize::MIN,
140///         }
141///     }
142///
143///     fn invalid_output_index(
144///         &mut self,
145///         _codec: &ByteCodec,
146///         _index: usize,
147///         _output_len: usize,
148///     ) -> Self::Error {
149///         ByteDecodeError::Malformed {
150///             consumed: NonZeroUsize::MIN,
151///         }
152///     }
153/// }
154///
155/// let mut engine = BufferedDecodeEngine::<_, _>::new(ByteCodec, ReplacementHooks);
156/// let input = [b'a', 0xff, b'b'];
157/// let mut output = [0_u8; 3];
158///
159/// let progress = engine.transcode(&input, 0, &mut output, 0)?;
160/// match progress.status() {
161///     TranscodeStatus::Complete => assert_eq!(&output[..progress.written()], b"a?b"),
162///     TranscodeStatus::NeedInput { input_index, .. } => {
163///         // Keep `input[input_index..]`, append more source units, and resume.
164///     }
165///     TranscodeStatus::NeedOutput { output_index, .. } => {
166///         // Drain `output[..output_index]`, then resume with more output room.
167///     }
168/// }
169/// # Ok::<(), ByteDecodeError>(())
170/// ```
171///
172/// # Type Parameters
173///
174/// - `C`: Low-level codec used by the engine.
175/// - `H`: Policy hook object used by the engine.
176#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)]
177pub struct BufferedDecodeEngine<C, H> {
178    /// Low-level codec used for one-value decoding.
179    pub(super) codec: C,
180    /// Policy hooks used for decode failures.
181    pub(super) hooks: H,
182}
183
184impl<C, H> BufferedDecodeEngine<C, H>
185where
186    C: Codec,
187    H: BufferedDecodeHooks<C>,
188{
189    /// Creates a buffered decoder engine.
190    ///
191    /// # Parameters
192    ///
193    /// - `codec`: Low-level codec used for one-value decoding.
194    /// - `hooks`: Policy hooks used for decode failures.
195    ///
196    /// # Returns
197    ///
198    /// Returns a buffered decoder engine.
199    #[must_use]
200    #[inline(always)]
201    pub const fn new(codec: C, hooks: H) -> Self {
202        Self { codec, hooks }
203    }
204
205    /// Returns an upper bound for decoded values produced from `input_len` units.
206    ///
207    /// # Parameters
208    ///
209    /// - `input_len`: Number of source units the caller plans to decode.
210    ///
211    /// # Returns
212    ///
213    /// Returns a conservative upper bound, or a capacity error on arithmetic
214    /// overflow.
215    #[must_use = "capacity planning can fail on overflow"]
216    #[inline(always)]
217    pub fn max_output_len(&self, input_len: usize) -> Result<usize, CapacityError> {
218        assert_unit_bounds::<C>(&self.codec);
219        self.hooks.max_output_len(&self.codec, input_len)
220    }
221
222    /// Returns the maximum values emitted by finishing hook-owned state.
223    ///
224    /// # Returns
225    ///
226    /// Returns the hook-provided final output bound.
227    #[must_use]
228    #[inline(always)]
229    pub fn max_finish_output_len(&self) -> usize {
230        self.hooks.max_finish_output_len(&self.codec)
231    }
232
233    /// Resets hook-owned state.
234    ///
235    /// # Parameters
236    ///
237    /// - `self`: Decoder instance whose hook state is reset.
238    ///
239    /// # Returns
240    ///
241    /// Returns unit `()`.
242    #[inline(always)]
243    pub fn reset(&mut self) {
244        self.hooks.reset(&self.codec);
245    }
246
247    /// Decodes source units into caller-provided output values.
248    ///
249    /// # Parameters
250    ///
251    /// - `input`: Complete input unit slice visible to the decoder.
252    /// - `input_index`: Absolute input unit index where decoding starts.
253    /// - `output`: Complete output value slice visible to the decoder.
254    /// - `output_index`: Absolute output value index where writing starts.
255    ///
256    /// # Returns
257    ///
258    /// Returns progress describing input units consumed, output values written,
259    /// and why decoding stopped.
260    ///
261    /// # Errors
262    ///
263    /// Returns hook errors when `input_index` is outside `input`, when
264    /// `output_index` is outside `output`, or when a concrete policy hook
265    /// rejects a value.
266    pub fn transcode(
267        &mut self,
268        input: &[C::Unit],
269        input_index: usize,
270        output: &mut [C::Value],
271        output_index: usize,
272    ) -> Result<TranscodeProgress, H::Error> {
273        if input_index > input.len() {
274            return Err(self.hooks.invalid_input_index(&self.codec, input_index, input.len()));
275        }
276        if output_index > output.len() {
277            return Err(self.hooks.invalid_output_index(&self.codec, output_index, output.len()));
278        }
279        assert_unit_bounds::<C>(&self.codec);
280        let mut state = DecodeState::new(input, input_index, output, output_index);
281
282        while state.has_input() {
283            let context = state.context();
284            let step = self.decode_step(state.input(), context)?;
285            if let Some(progress) = step.apply_to_decode_state(&mut state) {
286                return Ok(progress);
287            }
288        }
289
290        Ok(state.complete_progress())
291    }
292
293    /// Finishes hook-owned output after EOF.
294    ///
295    /// The engine owns no final output state itself. Hook implementations may
296    /// finish their own retained state and emit final output after the caller
297    /// has handled any incomplete input tail. The caller must provide enough
298    /// output capacity for [`BufferedDecodeEngine::max_finish_output_len`].
299    ///
300    /// # Parameters
301    ///
302    /// - `output`: Complete output value slice visible to the decoder.
303    /// - `output_index`: Absolute output value index where writing starts.
304    ///
305    /// # Returns
306    ///
307    /// Returns the number of values written by finalization.
308    ///
309    /// # Errors
310    ///
311    /// Returns [`FinishError`] when the caller provides invalid or insufficient
312    /// output capacity, or when hook finalization fails.
313    ///
314    /// # Panics
315    ///
316    /// Panics when the hook writes or reports more final output values than
317    /// [`BufferedDecodeEngine::max_finish_output_len`] declared.
318    pub fn finish(&mut self, output: &mut [C::Value], output_index: usize) -> Result<usize, FinishError<H::Error>> {
319        let required = self.max_finish_output_len();
320        FinishError::ensure_output_capacity(output.len(), output_index, required)?;
321        let output_end = output_index + required;
322        let output = &mut output[..output_end];
323        let written = self
324            .hooks
325            .finish(&self.codec, output, output_index)
326            .map_err(FinishError::source)?;
327        assert!(
328            written <= required,
329            "BufferedDecodeEngine hook wrote beyond its finish bound",
330        );
331        Ok(written)
332    }
333
334    /// Decodes one value at a caller-proven readable input cursor.
335    ///
336    /// # Safety
337    ///
338    /// The caller must guarantee that at least `codec.min_units_per_value()`
339    /// units are readable from `input_index`.
340    #[inline(always)]
341    pub(crate) unsafe fn decode_unchecked_at(
342        &self,
343        input: &[C::Unit],
344        input_index: usize,
345    ) -> Result<(C::Value, NonZeroUsize), C::DecodeError> {
346        // SAFETY: Forwarded from this method's safety contract.
347        unsafe { self.codec.decode_unchecked(input, input_index) }
348    }
349
350    /// Lets the configured decode hooks classify a low-level decode error.
351    ///
352    /// # Parameters
353    ///
354    /// - `error`: Decode error returned by [`Codec::decode_unchecked`].
355    /// - `context`: Decode context used by policy hooks.
356    ///
357    /// # Returns
358    ///
359    /// Returns the decoded action chosen by policy hooks.
360    ///
361    /// # Errors
362    ///
363    /// Returns a hook-level error when the decode policy rejects the value.
364    #[inline(always)]
365    pub(crate) fn handle_decode_error(
366        &mut self,
367        error: C::DecodeError,
368        context: DecodeContext,
369    ) -> Result<DecodeAction<C::Value>, H::Error> {
370        self.hooks.handle_decode_error(&self.codec, error, context)
371    }
372
373    /// Decodes one source value attempt into a normalized decode step.
374    ///
375    /// # Parameters
376    ///
377    /// - `input`: Complete input unit slice visible to the caller.
378    /// - `context`: Decode context describing the current source and output cursors.
379    ///
380    /// # Returns
381    ///
382    /// Returns one internal decode step, including successful decode, policy
383    /// skip/emit, or need-input state.
384    ///
385    /// # Errors
386    ///
387    /// Returns hook errors when the decode policy rejects the input.
388    #[inline]
389    pub(super) fn decode_step(
390        &mut self,
391        input: &[C::Unit],
392        context: DecodeContext,
393    ) -> Result<DecodeStep<C::Value>, H::Error> {
394        let min_units = self.codec.min_units_per_value().get();
395        if context.available < min_units {
396            let additional = NonZeroUsize::new(min_units - context.available).expect("missing input is non-zero");
397            return Ok(DecodeStep::need_input(additional, context.available));
398        }
399
400        // SAFETY: The context reports at least `min_units_per_value()` source
401        // units available from `context.input_index`.
402        let result = unsafe { self.decode_unchecked_at(input, context.input_index) };
403        self.handle_decode_result(context, result)
404    }
405
406    /// Handles one low-level decode result and returns a normalized decode step.
407    ///
408    /// # Parameters
409    ///
410    /// - `context`: Decode context used by policy hooks.
411    /// - `result`: Low-level codec decode result.
412    ///
413    /// # Returns
414    ///
415    /// Returns the normalized decode step selected by codec success or policy
416    /// hooks.
417    ///
418    /// # Errors
419    ///
420    /// Returns hook errors when the policy rejects the input.
421    #[inline]
422    fn handle_decode_result(
423        &mut self,
424        context: DecodeContext,
425        result: Result<(C::Value, NonZeroUsize), C::DecodeError>,
426    ) -> Result<DecodeStep<C::Value>, H::Error> {
427        match result {
428            Ok((value, consumed)) => {
429                assert!(
430                    consumed.get() <= context.available,
431                    "Codec::decode_unchecked consumed beyond available input",
432                );
433                Ok(DecodeStep::decoded(value, consumed, context.input_index))
434            }
435            Err(error) => {
436                let action = self.handle_decode_error(error, context)?;
437                Ok(action.into_step(context.input_index, context.available))
438            }
439        }
440    }
441}