Skip to main content

qubit_codec/buffered/
buffered_transcoder.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10use super::{
11    capacity_error::CapacityError,
12    finish_error::FinishError,
13    transcode_progress::TranscodeProgress,
14};
15
16/// Converts one logical stream of input units into one logical stream of output units.
17///
18/// `transcode` is the main streaming API. It transforms a provided input
19/// segment and writes as much output as available buffer space allows.
20///
21/// A transcoder instance has a simple lifecycle:
22///
23/// 1. A newly created or reset instance is ready for a new logical stream.
24/// 2. Call [`BufferedTranscoder::transcode`] zero or more times while input is available.
25/// 3. Preserve any tail reported by [`crate::TranscodeStatus::NeedInput`] in
26///    the caller-owned input buffer.
27/// 4. Call [`BufferedTranscoder::finish`] after the caller knows no more input remains
28///    and has handled any incomplete tail. Size this final output with
29///    [`BufferedTranscoder::max_finish_output_len`].
30/// 5. After [`BufferedTranscoder::finish`] succeeds, call [`BufferedTranscoder::reset`] before
31///    starting another logical stream with the same instance.
32///
33/// The method is suitable for:
34/// - pull-style consumers that call conversion repeatedly as buffers arrive;
35/// - bounded output sinks that use `NeedOutput` progress during `transcode`;
36/// - stateless and stateful codecs that all return progress-oriented stopping
37///   reasons.
38///
39/// `BufferedTranscoder` is intentionally independent from any charset semantics:
40///
41/// - Use `BufferedTranscoder` directly for custom, policy-free unit transforms.
42/// - Use `BufferedTranscoder` when you want to own malformed/unmappable decisions at the call site.
43///
44/// # Example: streaming byte-to-word decoder
45///
46/// ```rust
47/// use core::num::NonZeroUsize;
48/// use qubit_codec::{BufferedTranscoder, TranscodeProgress, TranscodeStatus};
49///
50/// #[derive(Default)]
51/// struct U16BeBytesDecoder;
52///
53/// #[derive(Debug, Eq, PartialEq)]
54/// enum U16BeBytesDecodeError {
55///     InvalidInputIndex,
56///     InvalidOutputIndex,
57/// }
58///
59/// impl BufferedTranscoder<u8, u16> for U16BeBytesDecoder {
60///     type Error = U16BeBytesDecodeError;
61///
62///     fn max_output_len(&self, input_len: usize) -> Result<usize, qubit_codec::CapacityError> {
63///         Ok(input_len / 2)
64///     }
65///
66///     fn transcode(
67///         &mut self,
68///         input: &[u8],
69///         input_index: usize,
70///         output: &mut [u16],
71///         output_index: usize,
72///     ) -> Result<TranscodeProgress, Self::Error> {
73///         if input_index > input.len() {
74///             return Err(U16BeBytesDecodeError::InvalidInputIndex);
75///         }
76///         if output_index > output.len() {
77///             return Err(U16BeBytesDecodeError::InvalidOutputIndex);
78///         }
79///
80///         let mut read = 0;
81///         let mut written = 0;
82///         while input_index + read + 1 < input.len() {
83///             if output_index + written == output.len() {
84///                 let status = TranscodeStatus::NeedOutput {
85///                     output_index: output_index + written,
86///                     additional: NonZeroUsize::MIN,
87///                     available: 0,
88///                 };
89///                 return Ok(TranscodeProgress::new(status, read, written));
90///             }
91///             let high = input[input_index + read] as u16;
92///             let low = input[input_index + read + 1] as u16;
93///             output[output_index + written] = (high << 8) | low;
94///             read += 2;
95///             written += 1;
96///         }
97///         if input_index + read == input.len() {
98///             Ok(TranscodeProgress::complete(read, written))
99///         } else {
100///             let available = input.len() - (input_index + read);
101///             let status = TranscodeStatus::NeedInput {
102///                 input_index: input_index + read,
103///                 additional: NonZeroUsize::new(2 - available).expect("missing input is non-zero"),
104///                 available,
105///             };
106///             Ok(TranscodeProgress::new(status, read, written))
107///         }
108///     }
109/// }
110///
111/// let mut transcoder = U16BeBytesDecoder;
112/// let mut output = [0_u16; 1];
113/// let progress = transcoder
114///     .transcode(&[0x12, 0x34, 0xab, 0xcd], 0, &mut output, 0)
115///     .expect("decoding cannot fail");
116/// assert_eq!(TranscodeStatus::NeedOutput {
117///     output_index: 1,
118///     additional: NonZeroUsize::MIN,
119///     available: 0,
120/// }, progress.status());
121/// assert_eq!(2, progress.read());
122/// assert_eq!(1, progress.written());
123/// assert_eq!([0x1234], output);
124///
125/// let mut output = [0_u16; 2];
126/// let progress = transcoder
127///     .transcode(&[0x12, 0x34, 0xab], 0, &mut output, 0)
128///     .expect("decoding cannot fail");
129/// assert_eq!(TranscodeStatus::NeedInput {
130///     input_index: 2,
131///     additional: NonZeroUsize::MIN,
132///     available: 1,
133/// }, progress.status());
134/// assert_eq!(2, progress.read());
135/// assert_eq!(1, progress.written());
136/// assert_eq!([0x1234, 0], output);
137///
138/// assert!(matches!(
139///     transcoder.transcode(&[0x12], 2, &mut output, 0),
140///     Err(U16BeBytesDecodeError::InvalidInputIndex),
141/// ));
142/// assert!(matches!(
143///     transcoder.transcode(&[0x12], 0, &mut output, 3),
144///     Err(U16BeBytesDecodeError::InvalidOutputIndex),
145/// ));
146/// ```
147///
148/// The trait is intentionally independent from charset concepts. Implementors
149/// use `input_index` and `output_index` as absolute positions in the supplied
150/// slices. Returned progress counters are relative counts from those positions.
151/// For raw codecs this gives a compact API; higher-level workflows can wrap this
152/// trait with their own semantic policies.
153///
154/// # Type Parameters
155///
156/// - `Input`: Input unit type accepted by this transcoder.
157/// - `Output`: Output unit type produced by this transcoder.
158pub trait BufferedTranscoder<Input, Output> {
159    /// Error reported for semantic conversion failures.
160    type Error;
161
162    /// Returns an upper bound for output units produced from `input_len` units.
163    ///
164    /// For stateful transcoders, this bound is evaluated against the current
165    /// instance state and must include any already-retained output that may be
166    /// emitted before or alongside output derived from the supplied input.
167    ///
168    /// # Parameters
169    ///
170    /// - `input_len`: Number of input units the caller plans to transcode.
171    ///
172    /// # Returns
173    ///
174    /// Returns `Ok(bound)` when the upper bound can be represented as `usize`.
175    /// Returns [`CapacityError::OutputLengthOverflow`] when capacity arithmetic
176    /// overflows.
177    #[must_use = "capacity planning can fail on overflow"]
178    fn max_output_len(&self, input_len: usize) -> Result<usize, CapacityError>;
179
180    /// Returns an upper bound for output units produced by stream finalization.
181    ///
182    /// This bound is evaluated against the transcoder's current state. It does
183    /// not include output that may be produced by future [`BufferedTranscoder::transcode`]
184    /// calls. Use it before [`BufferedTranscoder::finish`] when the caller wants to size
185    /// a final output buffer for the already supplied input.
186    ///
187    /// # Returns
188    ///
189    /// Returns `Ok(bound)` when the upper bound can be represented as `usize`.
190    /// Returns [`CapacityError::OutputLengthOverflow`] when capacity arithmetic
191    /// overflows. Stateless transcoders default to `Ok(0)`.
192    #[must_use = "capacity planning can fail on overflow"]
193    #[inline(always)]
194    fn max_finish_output_len(&self) -> Result<usize, CapacityError> {
195        Ok(0)
196    }
197
198    /// Resets state retained between conversion calls.
199    ///
200    /// This starts a new logical stream while keeping configuration such as
201    /// byte order, charset policy, replacement values, and cryptographic keys.
202    /// Pending input, pending output, and completed-stream state must be
203    /// discarded by stateful implementations. Stateless transcoders may keep
204    /// the default no-op implementation.
205    ///
206    /// # Returns
207    ///
208    /// Returns unit `()`.
209    #[inline(always)]
210    fn reset(&mut self) {}
211
212    /// Converts available input units into output units.
213    ///
214    /// This method processes an input segment without closing the logical input
215    /// stream. When the current segment ends in a partial value, the transcoder
216    /// reports [`crate::TranscodeStatus::NeedInput`] without consuming that
217    /// tail. The caller owns input-buffer refill and EOF incomplete-tail policy.
218    ///
219    /// # Parameters
220    ///
221    /// - `input`: Complete input unit slice visible to the transcoder.
222    /// - `input_index`: Absolute input unit index where conversion starts.
223    /// - `output`: Complete output unit slice visible to the transcoder.
224    /// - `output_index`: Absolute output unit index where writing starts.
225    ///
226    /// # Returns
227    ///
228    /// Returns progress describing how many units were consumed and produced and
229    /// why conversion stopped.
230    ///
231    /// # Errors
232    ///
233    /// Returns `Self::Error` for semantic conversion failures that the transcoder's
234    /// policy does not absorb, including caller-supplied `input_index` or
235    /// `output_index` values outside their corresponding slices.
236    fn transcode(
237        &mut self,
238        input: &[Input],
239        input_index: usize,
240        output: &mut [Output],
241        output_index: usize,
242    ) -> Result<TranscodeProgress, Self::Error>;
243
244    /// Finishes internally retained output after all input has been supplied.
245    ///
246    /// `transcode` handles ordinary input consumption. `finish` is called once
247    /// after the caller knows no more input remains and has handled any
248    /// incomplete input tail reported by `transcode`. It emits final output
249    /// derived from internal state, such as reset bytes, checksums, digests, or
250    /// trailers. The caller must provide enough output capacity for
251    /// [`BufferedTranscoder::max_finish_output_len`].
252    ///
253    /// After `finish` succeeds, the logical stream is closed. Portable callers
254    /// should call [`BufferedTranscoder::reset`] before passing input for another
255    /// logical stream to the same instance.
256    ///
257    /// # Example
258    ///
259    /// ```rust
260    /// use core::num::NonZeroUsize;
261    /// use qubit_codec::{BufferedTranscoder, TranscodeStatus};
262    ///
263    /// #[derive(Default)]
264    /// struct ByteCopy;
265    ///
266    /// impl BufferedTranscoder<u8, u8> for ByteCopy {
267    ///     type Error = core::convert::Infallible;
268    ///
269    ///     fn max_output_len(&self, input_len: usize) -> Result<usize, qubit_codec::CapacityError> {
270    ///         Ok(input_len)
271    ///     }
272    ///
273    ///     fn transcode(
274    ///         &mut self,
275    ///         input: &[u8],
276    ///         input_index: usize,
277    ///         output: &mut [u8],
278    ///         output_index: usize,
279    ///     ) -> Result<qubit_codec::TranscodeProgress, Self::Error> {
280    ///         let mut read = 0;
281    ///         let mut written = 0;
282    ///         while input_index + read < input.len() && output_index + written < output.len() {
283    ///             output[output_index + written] = input[input_index + read];
284    ///             read += 1;
285    ///             written += 1;
286    ///         }
287    ///         if input_index + read == input.len() {
288    ///             Ok(qubit_codec::TranscodeProgress::complete(read, written))
289    ///         } else {
290    ///             let status = qubit_codec::TranscodeStatus::NeedOutput {
291    ///                 output_index: output_index + written,
292    ///                 additional: NonZeroUsize::MIN,
293    ///                 available: output.len().saturating_sub(output_index + written),
294    ///             };
295    ///             Ok(qubit_codec::TranscodeProgress::new(
296    ///                 status,
297    ///                 read,
298    ///                 written,
299    ///             ))
300    ///         }
301    ///     }
302    /// }
303    ///
304    /// let mut transcoder = ByteCopy;
305    /// let mut output = [1_u8; 1];
306    /// let progress = transcoder
307    ///     .transcode(&[7], 0, &mut output, 0)
308    ///     .expect("writer consumes one unit");
309    /// assert_eq!(TranscodeStatus::Complete, progress.status());
310    ///
311    /// let written = transcoder
312    ///     .finish(&mut output, 1)
313    ///     .expect("finish does not emit final state for no-op transcoders");
314    /// assert_eq!(0, written);
315    /// ```
316    ///
317    /// # Parameters
318    ///
319    /// - `output`: Complete output unit slice visible to the transcoder.
320    /// - `output_index`: Absolute output unit index where writing starts.
321    ///
322    /// # Returns
323    ///
324    /// Returns the number of units written during finalization. Stateless
325    /// transcoders return `0`.
326    ///
327    /// # Errors
328    ///
329    /// Returns [`FinishError`] when `output_index` is invalid, when output
330    /// capacity is insufficient, or when internal state cannot be finished
331    /// according to the transcoder's policy.
332    #[inline]
333    fn finish(&mut self, output: &mut [Output], output_index: usize) -> Result<usize, FinishError<Self::Error>> {
334        if output_index > output.len() {
335            return Err(FinishError::invalid_output_index(output_index, output.len()));
336        }
337        Ok(0)
338    }
339}