Skip to main content

qubit_io/ext/
buf_read_ext.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10use std::io::{
11    BufRead,
12    Error,
13    ErrorKind,
14    Result,
15};
16use std::string::FromUtf8Error;
17
18use crate::util::{
19    try_reserve_string,
20    try_reserve_vec,
21};
22
23/// Extension methods for [`BufRead`] values.
24///
25/// `BufReadExt` provides bounded delimiter-oriented reads. These helpers are
26/// useful for line-based and delimiter-based formats where accepting unbounded
27/// input would make parsers vulnerable to excessive memory use.
28pub trait BufReadExt: BufRead {
29    /// Reads bytes through `delimiter` while enforcing `max_len`.
30    ///
31    /// The returned vector includes the delimiter when it is found. EOF before
32    /// the delimiter is accepted as long as the accumulated bytes do not exceed
33    /// `max_len`. If the limit is exceeded, this method may consume the
34    /// accepted prefix before reporting the error.
35    ///
36    /// # Parameters
37    /// - `delimiter`: Delimiter byte to search for.
38    /// - `max_len`: Maximum accepted result length, including the delimiter.
39    ///
40    /// # Returns
41    /// Bytes read from the stream.
42    ///
43    /// # Errors
44    /// Returns [`ErrorKind::InvalidData`] when more than `max_len` bytes are
45    /// required before reaching `delimiter` or EOF. Returns the first I/O error
46    /// reported by the underlying reader.
47    fn read_until_limited(&mut self, delimiter: u8, max_len: usize) -> Result<Vec<u8>>;
48
49    /// Reads bytes through `delimiter` into `output` while enforcing `max_len`.
50    ///
51    /// This method appends at most `max_len` bytes from the current reader
52    /// position to `output`. The delimiter is included when it is found. If the
53    /// limit is exceeded, the accepted prefix may already have been appended to
54    /// `output` and consumed from the reader.
55    ///
56    /// # Parameters
57    /// - `delimiter`: Delimiter byte to search for.
58    /// - `output`: Destination vector to append to.
59    /// - `max_len`: Maximum accepted result length, including the delimiter.
60    ///
61    /// # Returns
62    /// Number of bytes appended to `output`.
63    ///
64    /// # Errors
65    /// Returns [`ErrorKind::InvalidData`] when more than `max_len` bytes are
66    /// required before reaching `delimiter` or EOF. Returns the first I/O error
67    /// reported by the underlying reader.
68    fn read_until_limited_into(&mut self, delimiter: u8, output: &mut Vec<u8>, max_len: usize) -> Result<usize>;
69
70    /// Reads one UTF-8 line while enforcing `max_len`.
71    ///
72    /// The returned string includes the trailing `\n` when it is present. EOF
73    /// before a newline is accepted as long as the accumulated bytes do not
74    /// exceed `max_len`.
75    ///
76    /// # Parameters
77    /// - `max_len`: Maximum accepted line length in bytes, including `\n`.
78    ///
79    /// # Returns
80    /// The decoded UTF-8 line.
81    ///
82    /// # Errors
83    /// Returns [`ErrorKind::InvalidData`] when the line exceeds `max_len` or is
84    /// not valid UTF-8. Returns the first I/O error reported by the underlying
85    /// reader.
86    fn read_line_limited(&mut self, max_len: usize) -> Result<String>;
87
88    /// Reads one UTF-8 line into `output` while enforcing `max_len`.
89    ///
90    /// This method reads at most `max_len` bytes, validates the line as UTF-8,
91    /// and appends it to `output`. If the line is oversized or invalid UTF-8,
92    /// `output` is left unchanged. Oversized input may still consume the
93    /// accepted prefix from the reader while detecting the limit violation.
94    ///
95    /// # Parameters
96    /// - `output`: Destination string to append to.
97    /// - `max_len`: Maximum accepted line length in bytes, including `\n`.
98    ///
99    /// # Returns
100    /// Number of bytes appended to `output`.
101    ///
102    /// # Errors
103    /// Returns [`ErrorKind::InvalidData`] when the line exceeds `max_len` or is
104    /// not valid UTF-8. Returns the first I/O error reported by the underlying
105    /// reader.
106    fn read_line_limited_into(&mut self, output: &mut String, max_len: usize) -> Result<usize>;
107
108    /// Discards bytes through `delimiter` while enforcing `max_len`.
109    ///
110    /// The delimiter is consumed when it is found. EOF before the delimiter is
111    /// accepted as long as no more than `max_len` bytes are consumed.
112    ///
113    /// # Parameters
114    /// - `delimiter`: Delimiter byte to search for.
115    /// - `max_len`: Maximum number of bytes to discard, including the
116    ///   delimiter.
117    ///
118    /// # Returns
119    /// Number of bytes discarded.
120    ///
121    /// # Errors
122    /// Returns [`ErrorKind::InvalidData`] when more than `max_len` bytes are
123    /// required before reaching `delimiter` or EOF. Returns the first I/O error
124    /// reported by the underlying reader.
125    fn discard_until_limited(&mut self, delimiter: u8, max_len: usize) -> Result<usize>;
126}
127
128impl<T> BufReadExt for T
129where
130    T: BufRead + ?Sized,
131{
132    #[inline]
133    fn read_until_limited(&mut self, delimiter: u8, max_len: usize) -> Result<Vec<u8>> {
134        read_until_limited_impl(self, delimiter, max_len)
135    }
136
137    #[inline]
138    fn read_until_limited_into(&mut self, delimiter: u8, output: &mut Vec<u8>, max_len: usize) -> Result<usize> {
139        read_until_limited_into_impl(self, delimiter, output, max_len)
140    }
141
142    #[inline]
143    fn read_line_limited(&mut self, max_len: usize) -> Result<String> {
144        read_line_limited_impl(self, max_len)
145    }
146
147    #[inline]
148    fn read_line_limited_into(&mut self, output: &mut String, max_len: usize) -> Result<usize> {
149        read_line_limited_into_impl(self, output, max_len)
150    }
151
152    #[inline]
153    fn discard_until_limited(&mut self, delimiter: u8, max_len: usize) -> Result<usize> {
154        discard_until_limited_impl(self, delimiter, max_len)
155    }
156}
157
158/// Reads bytes through `delimiter` with a maximum result size.
159///
160/// # Parameters
161/// - `reader`: Buffered source reader.
162/// - `delimiter`: Delimiter byte to search for.
163/// - `max_len`: Maximum accepted result length.
164///
165/// # Returns
166/// Bytes read from the stream.
167///
168/// # Errors
169/// Returns an invalid-data error when the limit is exceeded, or an I/O error
170/// from `reader`.
171fn read_until_limited_impl<T>(reader: &mut T, delimiter: u8, max_len: usize) -> Result<Vec<u8>>
172where
173    T: BufRead + ?Sized,
174{
175    let mut output = Vec::new();
176    try_reserve_vec(&mut output, max_len.min(8192))?;
177    read_until_limited_into_impl(reader, delimiter, &mut output, max_len)?;
178    Ok(output)
179}
180
181/// Reads bytes through `delimiter` into `output` with a maximum result size.
182///
183/// # Parameters
184/// - `reader`: Buffered source reader.
185/// - `delimiter`: Delimiter byte to search for.
186/// - `output`: Destination vector to append to.
187/// - `max_len`: Maximum accepted result length.
188///
189/// # Returns
190/// Number of bytes appended to `output`.
191///
192/// # Errors
193/// Returns an invalid-data error when the limit is exceeded, or an I/O error
194/// from `reader`.
195fn read_until_limited_into_impl<T>(reader: &mut T, delimiter: u8, output: &mut Vec<u8>, max_len: usize) -> Result<usize>
196where
197    T: BufRead + ?Sized,
198{
199    let mut appended = 0;
200    loop {
201        let available = reader.fill_buf()?;
202        if available.is_empty() {
203            return Ok(appended);
204        }
205
206        let delimiter_position = available.iter().position(|byte| *byte == delimiter);
207        let requested = delimiter_position.map_or(available.len(), |position| position + 1);
208        let remaining = max_len.saturating_sub(appended);
209        if requested > remaining {
210            if remaining > 0 {
211                try_reserve_vec(output, remaining)?;
212                output.extend_from_slice(&available[..remaining]);
213                reader.consume(remaining);
214            }
215            return Err(limit_exceeded_error(max_len, delimiter));
216        }
217
218        try_reserve_vec(output, requested)?;
219        output.extend_from_slice(&available[..requested]);
220        reader.consume(requested);
221        appended += requested;
222        if delimiter_position.is_some() {
223            return Ok(appended);
224        }
225    }
226}
227
228/// Reads one UTF-8 line with a maximum byte length.
229///
230/// # Parameters
231/// - `reader`: Buffered source reader.
232/// - `max_len`: Maximum accepted line length in bytes.
233///
234/// # Returns
235/// Decoded line.
236///
237/// # Errors
238/// Returns an invalid-data error when the line exceeds the limit or is not
239/// valid UTF-8, or an I/O error from `reader`.
240fn read_line_limited_impl<T>(reader: &mut T, max_len: usize) -> Result<String>
241where
242    T: BufRead + ?Sized,
243{
244    let mut output = String::new();
245    read_line_limited_into_impl(reader, &mut output, max_len)?;
246    Ok(output)
247}
248
249/// Reads one UTF-8 line into `output` with a maximum byte length.
250///
251/// # Parameters
252/// - `reader`: Buffered source reader.
253/// - `output`: Destination string to append to.
254/// - `max_len`: Maximum accepted line length in bytes.
255///
256/// # Returns
257/// Number of bytes appended to `output`.
258///
259/// # Errors
260/// Returns an invalid-data error when the line exceeds the limit or is not
261/// valid UTF-8, or an I/O error from `reader`.
262fn read_line_limited_into_impl<T>(reader: &mut T, output: &mut String, max_len: usize) -> Result<usize>
263where
264    T: BufRead + ?Sized,
265{
266    let mut bytes = Vec::new();
267    try_reserve_vec(&mut bytes, max_len.min(8192))?;
268    let count = read_until_limited_into_impl(reader, b'\n', &mut bytes, max_len)?;
269    let line = String::from_utf8(bytes).map_err(invalid_utf8_error)?;
270    try_reserve_string(output, line.len())?;
271    output.push_str(&line);
272    Ok(count)
273}
274
275/// Discards bytes through `delimiter` with a maximum consumed size.
276///
277/// # Parameters
278/// - `reader`: Buffered source reader.
279/// - `delimiter`: Delimiter byte to search for.
280/// - `max_len`: Maximum accepted discard length.
281///
282/// # Returns
283/// Number of discarded bytes.
284///
285/// # Errors
286/// Returns an invalid-data error when the limit is exceeded, or an I/O error
287/// from `reader`.
288fn discard_until_limited_impl<T>(reader: &mut T, delimiter: u8, max_len: usize) -> Result<usize>
289where
290    T: BufRead + ?Sized,
291{
292    let mut discarded = 0;
293    loop {
294        let available = reader.fill_buf()?;
295        if available.is_empty() {
296            return Ok(discarded);
297        }
298
299        let delimiter_position = available.iter().position(|byte| *byte == delimiter);
300        let requested = delimiter_position.map_or(available.len(), |position| position + 1);
301        let remaining = max_len.saturating_sub(discarded);
302        if requested > remaining {
303            if remaining > 0 {
304                reader.consume(remaining);
305            }
306            return Err(limit_exceeded_error(max_len, delimiter));
307        }
308
309        reader.consume(requested);
310        discarded += requested;
311        if delimiter_position.is_some() {
312            return Ok(discarded);
313        }
314    }
315}
316
317/// Builds an invalid-data error for delimiter reads that exceed their limit.
318///
319/// # Parameters
320/// - `max_len`: Maximum accepted byte length.
321/// - `delimiter`: Delimiter byte searched by the caller.
322///
323/// # Returns
324/// An [`ErrorKind::InvalidData`] error.
325fn limit_exceeded_error(max_len: usize, delimiter: u8) -> Error {
326    Error::new(
327        ErrorKind::InvalidData,
328        format!("input exceeds maximum length of {max_len} bytes before delimiter {delimiter}"),
329    )
330}
331
332/// Converts an invalid UTF-8 line error into an I/O error.
333///
334/// # Parameters
335/// - `error`: UTF-8 conversion error.
336///
337/// # Returns
338/// An [`ErrorKind::InvalidData`] error containing the UTF-8 error context.
339fn invalid_utf8_error(error: FromUtf8Error) -> Error {
340    Error::new(
341        ErrorKind::InvalidData,
342        format!("limited line is not valid UTF-8: {error}"),
343    )
344}