Skip to main content

qubit_io/ext/
buf_read_ext.rs

1// =============================================================================
2//    Copyright (c) 2026 Haixing Hu.
3//
4//    SPDX-License-Identifier: Apache-2.0
5//
6//    Licensed under the Apache License, Version 2.0.
7// =============================================================================
8use std::io::{
9    BufRead,
10    Error,
11    ErrorKind,
12    Result,
13};
14use std::string::FromUtf8Error;
15
16use crate::util::{
17    try_reserve_string,
18    try_reserve_vec,
19};
20
21/// Extension methods for [`BufRead`] values.
22///
23/// `BufReadExt` provides bounded delimiter-oriented reads. These helpers are
24/// useful for line-based and delimiter-based formats where accepting unbounded
25/// input would make parsers vulnerable to excessive memory use.
26pub trait BufReadExt: BufRead {
27    /// Reads bytes through `delimiter` while enforcing `max_len`.
28    ///
29    /// The returned vector includes the delimiter when it is found. EOF before
30    /// the delimiter is accepted as long as the accumulated bytes do not exceed
31    /// `max_len`. If the limit is exceeded, this method may consume the
32    /// accepted prefix before reporting the error.
33    ///
34    /// # Parameters
35    /// - `delimiter`: Delimiter byte to search for.
36    /// - `max_len`: Maximum accepted result length, including the delimiter.
37    ///
38    /// # Returns
39    /// Bytes read from the stream.
40    ///
41    /// # Errors
42    /// Returns [`ErrorKind::InvalidData`] when more than `max_len` bytes are
43    /// required before reaching `delimiter` or EOF. Returns the first I/O error
44    /// reported by the underlying reader.
45    fn read_until_limited(
46        &mut self,
47        delimiter: u8,
48        max_len: usize,
49    ) -> Result<Vec<u8>>;
50
51    /// Reads bytes through `delimiter` into `output` while enforcing `max_len`.
52    ///
53    /// This method appends at most `max_len` bytes from the current reader
54    /// position to `output`. The delimiter is included when it is found. If the
55    /// limit is exceeded, the accepted prefix may already have been appended to
56    /// `output` and consumed from the reader.
57    ///
58    /// # Parameters
59    /// - `delimiter`: Delimiter byte to search for.
60    /// - `output`: Destination vector to append to.
61    /// - `max_len`: Maximum accepted result length, including the delimiter.
62    ///
63    /// # Returns
64    /// Number of bytes appended to `output`.
65    ///
66    /// # Errors
67    /// Returns [`ErrorKind::InvalidData`] when more than `max_len` bytes are
68    /// required before reaching `delimiter` or EOF. Returns the first I/O error
69    /// reported by the underlying reader.
70    fn read_until_limited_into(
71        &mut self,
72        delimiter: u8,
73        output: &mut Vec<u8>,
74        max_len: usize,
75    ) -> Result<usize>;
76
77    /// Reads one UTF-8 line while enforcing `max_len`.
78    ///
79    /// The returned string includes the trailing `\n` when it is present. EOF
80    /// before a newline is accepted as long as the accumulated bytes do not
81    /// exceed `max_len`.
82    ///
83    /// # Parameters
84    /// - `max_len`: Maximum accepted line length in bytes, including `\n`.
85    ///
86    /// # Returns
87    /// The decoded UTF-8 line.
88    ///
89    /// # Errors
90    /// Returns [`ErrorKind::InvalidData`] when the line exceeds `max_len` or is
91    /// not valid UTF-8. Returns the first I/O error reported by the underlying
92    /// reader.
93    fn read_line_limited(&mut self, max_len: usize) -> Result<String>;
94
95    /// Reads one UTF-8 line into `output` while enforcing `max_len`.
96    ///
97    /// This method reads at most `max_len` bytes, validates the line as UTF-8,
98    /// and appends it to `output`. If the line is oversized or invalid UTF-8,
99    /// `output` is left unchanged. Oversized input may still consume the
100    /// accepted prefix from the reader while detecting the limit violation.
101    ///
102    /// # Parameters
103    /// - `output`: Destination string to append to.
104    /// - `max_len`: Maximum accepted line length in bytes, including `\n`.
105    ///
106    /// # Returns
107    /// Number of bytes appended to `output`.
108    ///
109    /// # Errors
110    /// Returns [`ErrorKind::InvalidData`] when the line exceeds `max_len` or is
111    /// not valid UTF-8. Returns the first I/O error reported by the underlying
112    /// reader.
113    fn read_line_limited_into(
114        &mut self,
115        output: &mut String,
116        max_len: usize,
117    ) -> Result<usize>;
118
119    /// Discards bytes through `delimiter` while enforcing `max_len`.
120    ///
121    /// The delimiter is consumed when it is found. EOF before the delimiter is
122    /// accepted as long as no more than `max_len` bytes are consumed.
123    ///
124    /// # Parameters
125    /// - `delimiter`: Delimiter byte to search for.
126    /// - `max_len`: Maximum number of bytes to discard, including the
127    ///   delimiter.
128    ///
129    /// # Returns
130    /// Number of bytes discarded.
131    ///
132    /// # Errors
133    /// Returns [`ErrorKind::InvalidData`] when more than `max_len` bytes are
134    /// required before reaching `delimiter` or EOF. Returns the first I/O error
135    /// reported by the underlying reader.
136    fn discard_until_limited(
137        &mut self,
138        delimiter: u8,
139        max_len: usize,
140    ) -> Result<usize>;
141}
142
143impl<T> BufReadExt for T
144where
145    T: BufRead + ?Sized,
146{
147    #[inline]
148    fn read_until_limited(
149        &mut self,
150        delimiter: u8,
151        max_len: usize,
152    ) -> Result<Vec<u8>> {
153        read_until_limited_impl(self, delimiter, max_len)
154    }
155
156    #[inline]
157    fn read_until_limited_into(
158        &mut self,
159        delimiter: u8,
160        output: &mut Vec<u8>,
161        max_len: usize,
162    ) -> Result<usize> {
163        read_until_limited_into_impl(self, delimiter, output, max_len)
164    }
165
166    #[inline]
167    fn read_line_limited(&mut self, max_len: usize) -> Result<String> {
168        read_line_limited_impl(self, max_len)
169    }
170
171    #[inline]
172    fn read_line_limited_into(
173        &mut self,
174        output: &mut String,
175        max_len: usize,
176    ) -> Result<usize> {
177        read_line_limited_into_impl(self, output, max_len)
178    }
179
180    #[inline]
181    fn discard_until_limited(
182        &mut self,
183        delimiter: u8,
184        max_len: usize,
185    ) -> Result<usize> {
186        discard_until_limited_impl(self, delimiter, max_len)
187    }
188}
189
190/// Reads bytes through `delimiter` with a maximum result size.
191///
192/// # Parameters
193/// - `reader`: Buffered source reader.
194/// - `delimiter`: Delimiter byte to search for.
195/// - `max_len`: Maximum accepted result length.
196///
197/// # Returns
198/// Bytes read from the stream.
199///
200/// # Errors
201/// Returns an invalid-data error when the limit is exceeded, or an I/O error
202/// from `reader`.
203fn read_until_limited_impl<T>(
204    reader: &mut T,
205    delimiter: u8,
206    max_len: usize,
207) -> Result<Vec<u8>>
208where
209    T: BufRead + ?Sized,
210{
211    let mut output = Vec::new();
212    try_reserve_vec(&mut output, max_len.min(8192))?;
213    read_until_limited_into_impl(reader, delimiter, &mut output, max_len)?;
214    Ok(output)
215}
216
217/// Reads bytes through `delimiter` into `output` with a maximum result size.
218///
219/// # Parameters
220/// - `reader`: Buffered source reader.
221/// - `delimiter`: Delimiter byte to search for.
222/// - `output`: Destination vector to append to.
223/// - `max_len`: Maximum accepted result length.
224///
225/// # Returns
226/// Number of bytes appended to `output`.
227///
228/// # Errors
229/// Returns an invalid-data error when the limit is exceeded, or an I/O error
230/// from `reader`.
231fn read_until_limited_into_impl<T>(
232    reader: &mut T,
233    delimiter: u8,
234    output: &mut Vec<u8>,
235    max_len: usize,
236) -> Result<usize>
237where
238    T: BufRead + ?Sized,
239{
240    let mut appended = 0;
241    loop {
242        let available = reader.fill_buf()?;
243        if available.is_empty() {
244            return Ok(appended);
245        }
246
247        let delimiter_position =
248            available.iter().position(|byte| *byte == delimiter);
249        let requested =
250            delimiter_position.map_or(available.len(), |position| position + 1);
251        let remaining = max_len.saturating_sub(appended);
252        if requested > remaining {
253            if remaining > 0 {
254                try_reserve_vec(output, remaining)?;
255                output.extend_from_slice(&available[..remaining]);
256                reader.consume(remaining);
257            }
258            return Err(limit_exceeded_error(max_len, delimiter));
259        }
260
261        try_reserve_vec(output, requested)?;
262        output.extend_from_slice(&available[..requested]);
263        reader.consume(requested);
264        appended += requested;
265        if delimiter_position.is_some() {
266            return Ok(appended);
267        }
268    }
269}
270
271/// Reads one UTF-8 line with a maximum byte length.
272///
273/// # Parameters
274/// - `reader`: Buffered source reader.
275/// - `max_len`: Maximum accepted line length in bytes.
276///
277/// # Returns
278/// Decoded line.
279///
280/// # Errors
281/// Returns an invalid-data error when the line exceeds the limit or is not
282/// valid UTF-8, or an I/O error from `reader`.
283fn read_line_limited_impl<T>(reader: &mut T, max_len: usize) -> Result<String>
284where
285    T: BufRead + ?Sized,
286{
287    let mut output = String::new();
288    read_line_limited_into_impl(reader, &mut output, max_len)?;
289    Ok(output)
290}
291
292/// Reads one UTF-8 line into `output` with a maximum byte length.
293///
294/// # Parameters
295/// - `reader`: Buffered source reader.
296/// - `output`: Destination string to append to.
297/// - `max_len`: Maximum accepted line length in bytes.
298///
299/// # Returns
300/// Number of bytes appended to `output`.
301///
302/// # Errors
303/// Returns an invalid-data error when the line exceeds the limit or is not
304/// valid UTF-8, or an I/O error from `reader`.
305fn read_line_limited_into_impl<T>(
306    reader: &mut T,
307    output: &mut String,
308    max_len: usize,
309) -> Result<usize>
310where
311    T: BufRead + ?Sized,
312{
313    let mut bytes = Vec::new();
314    try_reserve_vec(&mut bytes, max_len.min(8192))?;
315    let count =
316        read_until_limited_into_impl(reader, b'\n', &mut bytes, max_len)?;
317    let line = String::from_utf8(bytes).map_err(invalid_utf8_error)?;
318    try_reserve_string(output, line.len())?;
319    output.push_str(&line);
320    Ok(count)
321}
322
323/// Discards bytes through `delimiter` with a maximum consumed size.
324///
325/// # Parameters
326/// - `reader`: Buffered source reader.
327/// - `delimiter`: Delimiter byte to search for.
328/// - `max_len`: Maximum accepted discard length.
329///
330/// # Returns
331/// Number of discarded bytes.
332///
333/// # Errors
334/// Returns an invalid-data error when the limit is exceeded, or an I/O error
335/// from `reader`.
336fn discard_until_limited_impl<T>(
337    reader: &mut T,
338    delimiter: u8,
339    max_len: usize,
340) -> Result<usize>
341where
342    T: BufRead + ?Sized,
343{
344    let mut discarded = 0;
345    loop {
346        let available = reader.fill_buf()?;
347        if available.is_empty() {
348            return Ok(discarded);
349        }
350
351        let delimiter_position =
352            available.iter().position(|byte| *byte == delimiter);
353        let requested =
354            delimiter_position.map_or(available.len(), |position| position + 1);
355        let remaining = max_len.saturating_sub(discarded);
356        if requested > remaining {
357            if remaining > 0 {
358                reader.consume(remaining);
359            }
360            return Err(limit_exceeded_error(max_len, delimiter));
361        }
362
363        reader.consume(requested);
364        discarded += requested;
365        if delimiter_position.is_some() {
366            return Ok(discarded);
367        }
368    }
369}
370
371/// Builds an invalid-data error for delimiter reads that exceed their limit.
372///
373/// # Parameters
374/// - `max_len`: Maximum accepted byte length.
375/// - `delimiter`: Delimiter byte searched by the caller.
376///
377/// # Returns
378/// An [`ErrorKind::InvalidData`] error.
379fn limit_exceeded_error(max_len: usize, delimiter: u8) -> Error {
380    Error::new(
381        ErrorKind::InvalidData,
382        format!(
383            "input exceeds maximum length of {max_len} bytes before delimiter {delimiter}"
384        ),
385    )
386}
387
388/// Converts an invalid UTF-8 line error into an I/O error.
389///
390/// # Parameters
391/// - `error`: UTF-8 conversion error.
392///
393/// # Returns
394/// An [`ErrorKind::InvalidData`] error containing the UTF-8 error context.
395fn invalid_utf8_error(error: FromUtf8Error) -> Error {
396    Error::new(
397        ErrorKind::InvalidData,
398        format!("limited line is not valid UTF-8: {error}"),
399    )
400}