Skip to main content

amaru_minicbor_extra/decode/
lazy.rs

1// Copyright 2025 PRAGMA
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::{fs, io::Read};
16
17use minicbor as cbor;
18
19/// A decoder that only consumes bytes CHUNK_SIZE at a time. Useful to decode large files while
20/// maintaining memory usage low.
21///
22/// The decoder keeps an internal state with the bytes that have been read but not consumed, and a
23/// handle to a source that implements [`std::io::Read`].
24///
25/// See [`Self::from_file`] for example, to lazily read and decode from a (large) file.
26pub struct LazyDecoder<'a> {
27    reader: &'a mut dyn Read,
28    bytes: Vec<u8>,
29}
30
31impl<'a> LazyDecoder<'a> {
32    const CHUNK_SIZE: usize = 2 * 1024 * 1024; // 2MiB, chosen at random by fair dice roll
33
34    pub fn from_file(file: &'a mut fs::File) -> Self {
35        Self { reader: file, bytes: Vec::with_capacity(Self::CHUNK_SIZE) }
36    }
37
38    /// Consumes enough bytes and skip the next CBOR element.
39    pub fn skip(&mut self) -> Result<(), Box<dyn std::error::Error>> {
40        self.with_decoder(|d| Ok(d.skip()?))
41    }
42
43    /// Consumes enough bytes and decode the next CBOR element.
44    pub fn decode<T: for<'d> cbor::decode::Decode<'d, ()>>(&mut self) -> Result<T, Box<dyn std::error::Error>> {
45        self.with_decoder(|d| Ok(d.decode()?))
46    }
47
48    /// Decode some element according to a custom strategy. This consumes more bytes if the decoder
49    /// fails due to a lack of bytes. And error otherwise.
50    pub fn with_decoder<T>(
51        &mut self,
52        decode: impl Fn(&mut cbor::decode::Decoder<'_>) -> Result<T, Box<dyn std::error::Error>>,
53    ) -> Result<T, Box<dyn std::error::Error>> {
54        let mut should_read_more = self.bytes.is_empty();
55        let mut can_read_more = true;
56        loop {
57            if should_read_more {
58                let mut buf = [0; Self::CHUNK_SIZE];
59                let read = self.reader.read(&mut buf).map_err(cbor::decode::Error::custom)?;
60                self.bytes.extend_from_slice(&buf);
61                can_read_more = read > 0;
62            }
63
64            let mut d = cbor::Decoder::new(&self.bytes);
65
66            match decode(&mut d) {
67                Ok(value) => {
68                    #[cfg(feature = "tracing")]
69                    if self.bytes.len() > 100 * Self::CHUNK_SIZE {
70                        tracing::warn!(
71                            target = std::any::type_name::<T>(),
72                            chunk_size = self.bytes.len(),
73                            hint = "consider decoding incrementally and/or in smaller chunks",
74                            "decoding large chunk"
75                        );
76                    }
77                    self.bytes = Vec::from(&self.bytes[d.position()..]);
78                    return Ok(value);
79                }
80                Err(err) if can_read_more => match err.downcast::<cbor::decode::Error>() {
81                    Ok(err) if err.is_end_of_input() => {
82                        should_read_more = true;
83                        continue;
84                    }
85                    Ok(err) => return Err(err),
86                    Err(err) => return Err(err),
87                },
88                Err(err) => return Err(err),
89            }
90        }
91    }
92}