Skip to main content

qubit_io/ext/
buf_read_ext.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10use std::io::{
11    BufRead,
12    Error,
13    ErrorKind,
14    Result,
15};
16use std::string::FromUtf8Error;
17
18/// Extension methods for [`BufRead`] values.
19///
20/// `BufReadExt` provides bounded delimiter-oriented reads. These helpers are
21/// useful for line-based and delimiter-based formats where accepting unbounded
22/// input would make parsers vulnerable to excessive memory use.
23pub trait BufReadExt: BufRead {
24    /// Reads bytes through `delimiter` while enforcing `max_len`.
25    ///
26    /// The returned vector includes the delimiter when it is found. EOF before
27    /// the delimiter is accepted as long as the accumulated bytes do not exceed
28    /// `max_len`. If the limit is exceeded, this method may consume the
29    /// accepted prefix before reporting the error.
30    ///
31    /// # Parameters
32    /// - `delimiter`: Delimiter byte to search for.
33    /// - `max_len`: Maximum accepted result length, including the delimiter.
34    ///
35    /// # Returns
36    /// Bytes read from the stream.
37    ///
38    /// # Errors
39    /// Returns [`ErrorKind::InvalidData`] when more than `max_len` bytes are
40    /// required before reaching `delimiter` or EOF. Returns the first I/O error
41    /// reported by the underlying reader.
42    fn read_until_limited(&mut self, delimiter: u8, max_len: usize) -> Result<Vec<u8>>;
43
44    /// Reads bytes through `delimiter` into `output` while enforcing `max_len`.
45    ///
46    /// This method appends at most `max_len` bytes from the current reader
47    /// position to `output`. The delimiter is included when it is found. If the
48    /// limit is exceeded, the accepted prefix may already have been appended to
49    /// `output` and consumed from the reader.
50    ///
51    /// # Parameters
52    /// - `delimiter`: Delimiter byte to search for.
53    /// - `output`: Destination vector to append to.
54    /// - `max_len`: Maximum accepted result length, including the delimiter.
55    ///
56    /// # Returns
57    /// Number of bytes appended to `output`.
58    ///
59    /// # Errors
60    /// Returns [`ErrorKind::InvalidData`] when more than `max_len` bytes are
61    /// required before reaching `delimiter` or EOF. Returns the first I/O error
62    /// reported by the underlying reader.
63    fn read_until_limited_into(
64        &mut self,
65        delimiter: u8,
66        output: &mut Vec<u8>,
67        max_len: usize,
68    ) -> Result<usize>;
69
70    /// Reads one UTF-8 line while enforcing `max_len`.
71    ///
72    /// The returned string includes the trailing `\n` when it is present. EOF
73    /// before a newline is accepted as long as the accumulated bytes do not
74    /// exceed `max_len`.
75    ///
76    /// # Parameters
77    /// - `max_len`: Maximum accepted line length in bytes, including `\n`.
78    ///
79    /// # Returns
80    /// The decoded UTF-8 line.
81    ///
82    /// # Errors
83    /// Returns [`ErrorKind::InvalidData`] when the line exceeds `max_len` or is
84    /// not valid UTF-8. Returns the first I/O error reported by the underlying
85    /// reader.
86    fn read_line_limited(&mut self, max_len: usize) -> Result<String>;
87
88    /// Reads one UTF-8 line into `output` while enforcing `max_len`.
89    ///
90    /// This method reads at most `max_len` bytes, validates the line as UTF-8,
91    /// and appends it to `output`. If the line is oversized or invalid UTF-8,
92    /// `output` is left unchanged. Oversized input may still consume the
93    /// accepted prefix from the reader while detecting the limit violation.
94    ///
95    /// # Parameters
96    /// - `output`: Destination string to append to.
97    /// - `max_len`: Maximum accepted line length in bytes, including `\n`.
98    ///
99    /// # Returns
100    /// Number of bytes appended to `output`.
101    ///
102    /// # Errors
103    /// Returns [`ErrorKind::InvalidData`] when the line exceeds `max_len` or is
104    /// not valid UTF-8. Returns the first I/O error reported by the underlying
105    /// reader.
106    fn read_line_limited_into(&mut self, output: &mut String, max_len: usize) -> Result<usize>;
107
108    /// Discards bytes through `delimiter` while enforcing `max_len`.
109    ///
110    /// The delimiter is consumed when it is found. EOF before the delimiter is
111    /// accepted as long as no more than `max_len` bytes are consumed.
112    ///
113    /// # Parameters
114    /// - `delimiter`: Delimiter byte to search for.
115    /// - `max_len`: Maximum number of bytes to discard, including the
116    ///   delimiter.
117    ///
118    /// # Returns
119    /// Number of bytes discarded.
120    ///
121    /// # Errors
122    /// Returns [`ErrorKind::InvalidData`] when more than `max_len` bytes are
123    /// required before reaching `delimiter` or EOF. Returns the first I/O error
124    /// reported by the underlying reader.
125    fn discard_until_limited(&mut self, delimiter: u8, max_len: usize) -> Result<usize>;
126}
127
128impl<T> BufReadExt for T
129where
130    T: BufRead + ?Sized,
131{
132    #[inline]
133    fn read_until_limited(&mut self, delimiter: u8, max_len: usize) -> Result<Vec<u8>> {
134        read_until_limited_impl(self, delimiter, max_len)
135    }
136
137    #[inline]
138    fn read_until_limited_into(
139        &mut self,
140        delimiter: u8,
141        output: &mut Vec<u8>,
142        max_len: usize,
143    ) -> Result<usize> {
144        read_until_limited_into_impl(self, delimiter, output, max_len)
145    }
146
147    #[inline]
148    fn read_line_limited(&mut self, max_len: usize) -> Result<String> {
149        read_line_limited_impl(self, max_len)
150    }
151
152    #[inline]
153    fn read_line_limited_into(&mut self, output: &mut String, max_len: usize) -> Result<usize> {
154        read_line_limited_into_impl(self, output, max_len)
155    }
156
157    #[inline]
158    fn discard_until_limited(&mut self, delimiter: u8, max_len: usize) -> Result<usize> {
159        discard_until_limited_impl(self, delimiter, max_len)
160    }
161}
162
163/// Reads bytes through `delimiter` with a maximum result size.
164///
165/// # Parameters
166/// - `reader`: Buffered source reader.
167/// - `delimiter`: Delimiter byte to search for.
168/// - `max_len`: Maximum accepted result length.
169///
170/// # Returns
171/// Bytes read from the stream.
172///
173/// # Errors
174/// Returns an invalid-data error when the limit is exceeded, or an I/O error
175/// from `reader`.
176fn read_until_limited_impl<T>(reader: &mut T, delimiter: u8, max_len: usize) -> Result<Vec<u8>>
177where
178    T: BufRead + ?Sized,
179{
180    let mut output = Vec::with_capacity(max_len.min(8192));
181    read_until_limited_into_impl(reader, delimiter, &mut output, max_len)?;
182    Ok(output)
183}
184
185/// Reads bytes through `delimiter` into `output` with a maximum result size.
186///
187/// # Parameters
188/// - `reader`: Buffered source reader.
189/// - `delimiter`: Delimiter byte to search for.
190/// - `output`: Destination vector to append to.
191/// - `max_len`: Maximum accepted result length.
192///
193/// # Returns
194/// Number of bytes appended to `output`.
195///
196/// # Errors
197/// Returns an invalid-data error when the limit is exceeded, or an I/O error
198/// from `reader`.
199fn read_until_limited_into_impl<T>(
200    reader: &mut T,
201    delimiter: u8,
202    output: &mut Vec<u8>,
203    max_len: usize,
204) -> Result<usize>
205where
206    T: BufRead + ?Sized,
207{
208    let mut appended = 0;
209    loop {
210        let available = reader.fill_buf()?;
211        if available.is_empty() {
212            return Ok(appended);
213        }
214
215        let delimiter_position = available.iter().position(|byte| *byte == delimiter);
216        let requested = delimiter_position.map_or(available.len(), |position| position + 1);
217        let remaining = max_len.saturating_sub(appended);
218        if requested > remaining {
219            if remaining > 0 {
220                output.extend_from_slice(&available[..remaining]);
221                reader.consume(remaining);
222            }
223            return Err(limit_exceeded_error(max_len, delimiter));
224        }
225
226        output.extend_from_slice(&available[..requested]);
227        reader.consume(requested);
228        appended += requested;
229        if delimiter_position.is_some() {
230            return Ok(appended);
231        }
232    }
233}
234
235/// Reads one UTF-8 line with a maximum byte length.
236///
237/// # Parameters
238/// - `reader`: Buffered source reader.
239/// - `max_len`: Maximum accepted line length in bytes.
240///
241/// # Returns
242/// Decoded line.
243///
244/// # Errors
245/// Returns an invalid-data error when the line exceeds the limit or is not
246/// valid UTF-8, or an I/O error from `reader`.
247fn read_line_limited_impl<T>(reader: &mut T, max_len: usize) -> Result<String>
248where
249    T: BufRead + ?Sized,
250{
251    let mut output = String::new();
252    read_line_limited_into_impl(reader, &mut output, max_len)?;
253    Ok(output)
254}
255
256/// Reads one UTF-8 line into `output` with a maximum byte length.
257///
258/// # Parameters
259/// - `reader`: Buffered source reader.
260/// - `output`: Destination string to append to.
261/// - `max_len`: Maximum accepted line length in bytes.
262///
263/// # Returns
264/// Number of bytes appended to `output`.
265///
266/// # Errors
267/// Returns an invalid-data error when the line exceeds the limit or is not
268/// valid UTF-8, or an I/O error from `reader`.
269fn read_line_limited_into_impl<T>(
270    reader: &mut T,
271    output: &mut String,
272    max_len: usize,
273) -> Result<usize>
274where
275    T: BufRead + ?Sized,
276{
277    let mut bytes = Vec::with_capacity(max_len.min(8192));
278    let count = read_until_limited_into_impl(reader, b'\n', &mut bytes, max_len)?;
279    let line = String::from_utf8(bytes).map_err(invalid_utf8_error)?;
280    output.push_str(&line);
281    Ok(count)
282}
283
284/// Discards bytes through `delimiter` with a maximum consumed size.
285///
286/// # Parameters
287/// - `reader`: Buffered source reader.
288/// - `delimiter`: Delimiter byte to search for.
289/// - `max_len`: Maximum accepted discard length.
290///
291/// # Returns
292/// Number of discarded bytes.
293///
294/// # Errors
295/// Returns an invalid-data error when the limit is exceeded, or an I/O error
296/// from `reader`.
297fn discard_until_limited_impl<T>(reader: &mut T, delimiter: u8, max_len: usize) -> Result<usize>
298where
299    T: BufRead + ?Sized,
300{
301    let mut discarded = 0;
302    loop {
303        let available = reader.fill_buf()?;
304        if available.is_empty() {
305            return Ok(discarded);
306        }
307
308        let delimiter_position = available.iter().position(|byte| *byte == delimiter);
309        let requested = delimiter_position.map_or(available.len(), |position| position + 1);
310        let remaining = max_len.saturating_sub(discarded);
311        if requested > remaining {
312            if remaining > 0 {
313                reader.consume(remaining);
314            }
315            return Err(limit_exceeded_error(max_len, delimiter));
316        }
317
318        reader.consume(requested);
319        discarded += requested;
320        if delimiter_position.is_some() {
321            return Ok(discarded);
322        }
323    }
324}
325
326/// Builds an invalid-data error for delimiter reads that exceed their limit.
327///
328/// # Parameters
329/// - `max_len`: Maximum accepted byte length.
330/// - `delimiter`: Delimiter byte searched by the caller.
331///
332/// # Returns
333/// An [`ErrorKind::InvalidData`] error.
334fn limit_exceeded_error(max_len: usize, delimiter: u8) -> Error {
335    Error::new(
336        ErrorKind::InvalidData,
337        format!("input exceeds maximum length of {max_len} bytes before delimiter {delimiter}"),
338    )
339}
340
341/// Converts an invalid UTF-8 line error into an I/O error.
342///
343/// # Parameters
344/// - `error`: UTF-8 conversion error.
345///
346/// # Returns
347/// An [`ErrorKind::InvalidData`] error containing the UTF-8 error context.
348fn invalid_utf8_error(error: FromUtf8Error) -> Error {
349    Error::new(
350        ErrorKind::InvalidData,
351        format!("limited line is not valid UTF-8: {error}"),
352    )
353}