amaru_minicbor_extra/decode/lazy.rs
1// Copyright 2025 PRAGMA
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::{fs, io::Read};
16
17use minicbor as cbor;
18
19/// A decoder that only consumes bytes CHUNK_SIZE at a time. Useful to decode large files while
20/// maintaining memory usage low.
21///
22/// The decoder keeps an internal state with the bytes that have been read but not consumed, and a
23/// handle to a source that implements [`std::io::Read`].
24///
25/// See [`Self::from_file`] for example, to lazily read and decode from a (large) file.
26pub struct LazyDecoder<'a> {
27 reader: &'a mut dyn Read,
28 bytes: Vec<u8>,
29}
30
31impl<'a> LazyDecoder<'a> {
32 const CHUNK_SIZE: usize = 2 * 1024 * 1024; // 2MiB, chosen at random by fair dice roll
33
34 pub fn from_file(file: &'a mut fs::File) -> Self {
35 Self { reader: file, bytes: Vec::with_capacity(Self::CHUNK_SIZE) }
36 }
37
38 /// Consumes enough bytes and skip the next CBOR element.
39 pub fn skip(&mut self) -> Result<(), Box<dyn std::error::Error>> {
40 self.with_decoder(|d| Ok(d.skip()?))
41 }
42
43 /// Consumes enough bytes and decode the next CBOR element.
44 pub fn decode<T: for<'d> cbor::decode::Decode<'d, ()>>(&mut self) -> Result<T, Box<dyn std::error::Error>> {
45 self.with_decoder(|d| Ok(d.decode()?))
46 }
47
48 /// Decode some element according to a custom strategy. This consumes more bytes if the decoder
49 /// fails due to a lack of bytes. And error otherwise.
50 pub fn with_decoder<T>(
51 &mut self,
52 decode: impl Fn(&mut cbor::decode::Decoder<'_>) -> Result<T, Box<dyn std::error::Error>>,
53 ) -> Result<T, Box<dyn std::error::Error>> {
54 let mut should_read_more = self.bytes.is_empty();
55 let mut can_read_more = true;
56 loop {
57 if should_read_more {
58 let mut buf = [0; Self::CHUNK_SIZE];
59 let read = self.reader.read(&mut buf).map_err(cbor::decode::Error::custom)?;
60 self.bytes.extend_from_slice(&buf);
61 can_read_more = read > 0;
62 }
63
64 let mut d = cbor::Decoder::new(&self.bytes);
65
66 match decode(&mut d) {
67 Ok(value) => {
68 #[cfg(feature = "tracing")]
69 if self.bytes.len() > 100 * Self::CHUNK_SIZE {
70 tracing::warn!(
71 target = std::any::type_name::<T>(),
72 chunk_size = self.bytes.len(),
73 hint = "consider decoding incrementally and/or in smaller chunks",
74 "decoding large chunk"
75 );
76 }
77 self.bytes = Vec::from(&self.bytes[d.position()..]);
78 return Ok(value);
79 }
80 Err(err) if can_read_more => match err.downcast::<cbor::decode::Error>() {
81 Ok(err) if err.is_end_of_input() => {
82 should_read_more = true;
83 continue;
84 }
85 Ok(err) => return Err(err),
86 Err(err) => return Err(err),
87 },
88 Err(err) => return Err(err),
89 }
90 }
91 }
92}