qubit_io/ext/buf_read_ext.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10use std::io::{
11 BufRead,
12 Error,
13 ErrorKind,
14 Result,
15};
16use std::string::FromUtf8Error;
17
18use crate::util::{
19 try_reserve_string,
20 try_reserve_vec,
21};
22
23/// Extension methods for [`BufRead`] values.
24///
25/// `BufReadExt` provides bounded delimiter-oriented reads. These helpers are
26/// useful for line-based and delimiter-based formats where accepting unbounded
27/// input would make parsers vulnerable to excessive memory use.
28pub trait BufReadExt: BufRead {
29 /// Reads bytes through `delimiter` while enforcing `max_len`.
30 ///
31 /// The returned vector includes the delimiter when it is found. EOF before
32 /// the delimiter is accepted as long as the accumulated bytes do not exceed
33 /// `max_len`. If the limit is exceeded, this method may consume the
34 /// accepted prefix before reporting the error.
35 ///
36 /// # Parameters
37 /// - `delimiter`: Delimiter byte to search for.
38 /// - `max_len`: Maximum accepted result length, including the delimiter.
39 ///
40 /// # Returns
41 /// Bytes read from the stream.
42 ///
43 /// # Errors
44 /// Returns [`ErrorKind::InvalidData`] when more than `max_len` bytes are
45 /// required before reaching `delimiter` or EOF. Returns the first I/O error
46 /// reported by the underlying reader.
47 fn read_until_limited(&mut self, delimiter: u8, max_len: usize) -> Result<Vec<u8>>;
48
49 /// Reads bytes through `delimiter` into `output` while enforcing `max_len`.
50 ///
51 /// This method appends at most `max_len` bytes from the current reader
52 /// position to `output`. The delimiter is included when it is found. If the
53 /// limit is exceeded, the accepted prefix may already have been appended to
54 /// `output` and consumed from the reader.
55 ///
56 /// # Parameters
57 /// - `delimiter`: Delimiter byte to search for.
58 /// - `output`: Destination vector to append to.
59 /// - `max_len`: Maximum accepted result length, including the delimiter.
60 ///
61 /// # Returns
62 /// Number of bytes appended to `output`.
63 ///
64 /// # Errors
65 /// Returns [`ErrorKind::InvalidData`] when more than `max_len` bytes are
66 /// required before reaching `delimiter` or EOF. Returns the first I/O error
67 /// reported by the underlying reader.
68 fn read_until_limited_into(&mut self, delimiter: u8, output: &mut Vec<u8>, max_len: usize) -> Result<usize>;
69
70 /// Reads one UTF-8 line while enforcing `max_len`.
71 ///
72 /// The returned string includes the trailing `\n` when it is present. EOF
73 /// before a newline is accepted as long as the accumulated bytes do not
74 /// exceed `max_len`.
75 ///
76 /// # Parameters
77 /// - `max_len`: Maximum accepted line length in bytes, including `\n`.
78 ///
79 /// # Returns
80 /// The decoded UTF-8 line.
81 ///
82 /// # Errors
83 /// Returns [`ErrorKind::InvalidData`] when the line exceeds `max_len` or is
84 /// not valid UTF-8. Returns the first I/O error reported by the underlying
85 /// reader.
86 fn read_line_limited(&mut self, max_len: usize) -> Result<String>;
87
88 /// Reads one UTF-8 line into `output` while enforcing `max_len`.
89 ///
90 /// This method reads at most `max_len` bytes, validates the line as UTF-8,
91 /// and appends it to `output`. If the line is oversized or invalid UTF-8,
92 /// `output` is left unchanged. Oversized input may still consume the
93 /// accepted prefix from the reader while detecting the limit violation.
94 ///
95 /// # Parameters
96 /// - `output`: Destination string to append to.
97 /// - `max_len`: Maximum accepted line length in bytes, including `\n`.
98 ///
99 /// # Returns
100 /// Number of bytes appended to `output`.
101 ///
102 /// # Errors
103 /// Returns [`ErrorKind::InvalidData`] when the line exceeds `max_len` or is
104 /// not valid UTF-8. Returns the first I/O error reported by the underlying
105 /// reader.
106 fn read_line_limited_into(&mut self, output: &mut String, max_len: usize) -> Result<usize>;
107
108 /// Discards bytes through `delimiter` while enforcing `max_len`.
109 ///
110 /// The delimiter is consumed when it is found. EOF before the delimiter is
111 /// accepted as long as no more than `max_len` bytes are consumed.
112 ///
113 /// # Parameters
114 /// - `delimiter`: Delimiter byte to search for.
115 /// - `max_len`: Maximum number of bytes to discard, including the
116 /// delimiter.
117 ///
118 /// # Returns
119 /// Number of bytes discarded.
120 ///
121 /// # Errors
122 /// Returns [`ErrorKind::InvalidData`] when more than `max_len` bytes are
123 /// required before reaching `delimiter` or EOF. Returns the first I/O error
124 /// reported by the underlying reader.
125 fn discard_until_limited(&mut self, delimiter: u8, max_len: usize) -> Result<usize>;
126}
127
128impl<T> BufReadExt for T
129where
130 T: BufRead + ?Sized,
131{
132 #[inline]
133 fn read_until_limited(&mut self, delimiter: u8, max_len: usize) -> Result<Vec<u8>> {
134 read_until_limited_impl(self, delimiter, max_len)
135 }
136
137 #[inline]
138 fn read_until_limited_into(&mut self, delimiter: u8, output: &mut Vec<u8>, max_len: usize) -> Result<usize> {
139 read_until_limited_into_impl(self, delimiter, output, max_len)
140 }
141
142 #[inline]
143 fn read_line_limited(&mut self, max_len: usize) -> Result<String> {
144 read_line_limited_impl(self, max_len)
145 }
146
147 #[inline]
148 fn read_line_limited_into(&mut self, output: &mut String, max_len: usize) -> Result<usize> {
149 read_line_limited_into_impl(self, output, max_len)
150 }
151
152 #[inline]
153 fn discard_until_limited(&mut self, delimiter: u8, max_len: usize) -> Result<usize> {
154 discard_until_limited_impl(self, delimiter, max_len)
155 }
156}
157
158/// Reads bytes through `delimiter` with a maximum result size.
159///
160/// # Parameters
161/// - `reader`: Buffered source reader.
162/// - `delimiter`: Delimiter byte to search for.
163/// - `max_len`: Maximum accepted result length.
164///
165/// # Returns
166/// Bytes read from the stream.
167///
168/// # Errors
169/// Returns an invalid-data error when the limit is exceeded, or an I/O error
170/// from `reader`.
171fn read_until_limited_impl<T>(reader: &mut T, delimiter: u8, max_len: usize) -> Result<Vec<u8>>
172where
173 T: BufRead + ?Sized,
174{
175 let mut output = Vec::new();
176 try_reserve_vec(&mut output, max_len.min(8192))?;
177 read_until_limited_into_impl(reader, delimiter, &mut output, max_len)?;
178 Ok(output)
179}
180
181/// Reads bytes through `delimiter` into `output` with a maximum result size.
182///
183/// # Parameters
184/// - `reader`: Buffered source reader.
185/// - `delimiter`: Delimiter byte to search for.
186/// - `output`: Destination vector to append to.
187/// - `max_len`: Maximum accepted result length.
188///
189/// # Returns
190/// Number of bytes appended to `output`.
191///
192/// # Errors
193/// Returns an invalid-data error when the limit is exceeded, or an I/O error
194/// from `reader`.
195fn read_until_limited_into_impl<T>(reader: &mut T, delimiter: u8, output: &mut Vec<u8>, max_len: usize) -> Result<usize>
196where
197 T: BufRead + ?Sized,
198{
199 let mut appended = 0;
200 loop {
201 let available = reader.fill_buf()?;
202 if available.is_empty() {
203 return Ok(appended);
204 }
205
206 let delimiter_position = available.iter().position(|byte| *byte == delimiter);
207 let requested = delimiter_position.map_or(available.len(), |position| position + 1);
208 let remaining = max_len.saturating_sub(appended);
209 if requested > remaining {
210 if remaining > 0 {
211 try_reserve_vec(output, remaining)?;
212 output.extend_from_slice(&available[..remaining]);
213 reader.consume(remaining);
214 }
215 return Err(limit_exceeded_error(max_len, delimiter));
216 }
217
218 try_reserve_vec(output, requested)?;
219 output.extend_from_slice(&available[..requested]);
220 reader.consume(requested);
221 appended += requested;
222 if delimiter_position.is_some() {
223 return Ok(appended);
224 }
225 }
226}
227
228/// Reads one UTF-8 line with a maximum byte length.
229///
230/// # Parameters
231/// - `reader`: Buffered source reader.
232/// - `max_len`: Maximum accepted line length in bytes.
233///
234/// # Returns
235/// Decoded line.
236///
237/// # Errors
238/// Returns an invalid-data error when the line exceeds the limit or is not
239/// valid UTF-8, or an I/O error from `reader`.
240fn read_line_limited_impl<T>(reader: &mut T, max_len: usize) -> Result<String>
241where
242 T: BufRead + ?Sized,
243{
244 let mut output = String::new();
245 read_line_limited_into_impl(reader, &mut output, max_len)?;
246 Ok(output)
247}
248
249/// Reads one UTF-8 line into `output` with a maximum byte length.
250///
251/// # Parameters
252/// - `reader`: Buffered source reader.
253/// - `output`: Destination string to append to.
254/// - `max_len`: Maximum accepted line length in bytes.
255///
256/// # Returns
257/// Number of bytes appended to `output`.
258///
259/// # Errors
260/// Returns an invalid-data error when the line exceeds the limit or is not
261/// valid UTF-8, or an I/O error from `reader`.
262fn read_line_limited_into_impl<T>(reader: &mut T, output: &mut String, max_len: usize) -> Result<usize>
263where
264 T: BufRead + ?Sized,
265{
266 let mut bytes = Vec::new();
267 try_reserve_vec(&mut bytes, max_len.min(8192))?;
268 let count = read_until_limited_into_impl(reader, b'\n', &mut bytes, max_len)?;
269 let line = String::from_utf8(bytes).map_err(invalid_utf8_error)?;
270 try_reserve_string(output, line.len())?;
271 output.push_str(&line);
272 Ok(count)
273}
274
275/// Discards bytes through `delimiter` with a maximum consumed size.
276///
277/// # Parameters
278/// - `reader`: Buffered source reader.
279/// - `delimiter`: Delimiter byte to search for.
280/// - `max_len`: Maximum accepted discard length.
281///
282/// # Returns
283/// Number of discarded bytes.
284///
285/// # Errors
286/// Returns an invalid-data error when the limit is exceeded, or an I/O error
287/// from `reader`.
288fn discard_until_limited_impl<T>(reader: &mut T, delimiter: u8, max_len: usize) -> Result<usize>
289where
290 T: BufRead + ?Sized,
291{
292 let mut discarded = 0;
293 loop {
294 let available = reader.fill_buf()?;
295 if available.is_empty() {
296 return Ok(discarded);
297 }
298
299 let delimiter_position = available.iter().position(|byte| *byte == delimiter);
300 let requested = delimiter_position.map_or(available.len(), |position| position + 1);
301 let remaining = max_len.saturating_sub(discarded);
302 if requested > remaining {
303 if remaining > 0 {
304 reader.consume(remaining);
305 }
306 return Err(limit_exceeded_error(max_len, delimiter));
307 }
308
309 reader.consume(requested);
310 discarded += requested;
311 if delimiter_position.is_some() {
312 return Ok(discarded);
313 }
314 }
315}
316
317/// Builds an invalid-data error for delimiter reads that exceed their limit.
318///
319/// # Parameters
320/// - `max_len`: Maximum accepted byte length.
321/// - `delimiter`: Delimiter byte searched by the caller.
322///
323/// # Returns
324/// An [`ErrorKind::InvalidData`] error.
325fn limit_exceeded_error(max_len: usize, delimiter: u8) -> Error {
326 Error::new(
327 ErrorKind::InvalidData,
328 format!("input exceeds maximum length of {max_len} bytes before delimiter {delimiter}"),
329 )
330}
331
332/// Converts an invalid UTF-8 line error into an I/O error.
333///
334/// # Parameters
335/// - `error`: UTF-8 conversion error.
336///
337/// # Returns
338/// An [`ErrorKind::InvalidData`] error containing the UTF-8 error context.
339fn invalid_utf8_error(error: FromUtf8Error) -> Error {
340 Error::new(
341 ErrorKind::InvalidData,
342 format!("limited line is not valid UTF-8: {error}"),
343 )
344}