qubit_io/ext/buf_read_ext.rs
1// =============================================================================
2// Copyright (c) 2026 Haixing Hu.
3//
4// SPDX-License-Identifier: Apache-2.0
5//
6// Licensed under the Apache License, Version 2.0.
7// =============================================================================
8use std::io::{
9 BufRead,
10 Error,
11 ErrorKind,
12 Result,
13};
14use std::string::FromUtf8Error;
15
16use crate::util::{
17 try_reserve_string,
18 try_reserve_vec,
19};
20
21/// Extension methods for [`BufRead`] values.
22///
23/// `BufReadExt` provides bounded delimiter-oriented reads. These helpers are
24/// useful for line-based and delimiter-based formats where accepting unbounded
25/// input would make parsers vulnerable to excessive memory use.
26pub trait BufReadExt: BufRead {
27 /// Reads bytes through `delimiter` while enforcing `max_len`.
28 ///
29 /// The returned vector includes the delimiter when it is found. EOF before
30 /// the delimiter is accepted as long as the accumulated bytes do not exceed
31 /// `max_len`. If the limit is exceeded, this method may consume the
32 /// accepted prefix before reporting the error.
33 ///
34 /// # Parameters
35 /// - `delimiter`: Delimiter byte to search for.
36 /// - `max_len`: Maximum accepted result length, including the delimiter.
37 ///
38 /// # Returns
39 /// Bytes read from the stream.
40 ///
41 /// # Errors
42 /// Returns [`ErrorKind::InvalidData`] when more than `max_len` bytes are
43 /// required before reaching `delimiter` or EOF. Returns the first I/O error
44 /// reported by the underlying reader.
45 fn read_until_limited(
46 &mut self,
47 delimiter: u8,
48 max_len: usize,
49 ) -> Result<Vec<u8>>;
50
51 /// Reads bytes through `delimiter` into `output` while enforcing `max_len`.
52 ///
53 /// This method appends at most `max_len` bytes from the current reader
54 /// position to `output`. The delimiter is included when it is found. If the
55 /// limit is exceeded, the accepted prefix may already have been appended to
56 /// `output` and consumed from the reader.
57 ///
58 /// # Parameters
59 /// - `delimiter`: Delimiter byte to search for.
60 /// - `output`: Destination vector to append to.
61 /// - `max_len`: Maximum accepted result length, including the delimiter.
62 ///
63 /// # Returns
64 /// Number of bytes appended to `output`.
65 ///
66 /// # Errors
67 /// Returns [`ErrorKind::InvalidData`] when more than `max_len` bytes are
68 /// required before reaching `delimiter` or EOF. Returns the first I/O error
69 /// reported by the underlying reader.
70 fn read_until_limited_into(
71 &mut self,
72 delimiter: u8,
73 output: &mut Vec<u8>,
74 max_len: usize,
75 ) -> Result<usize>;
76
77 /// Reads one UTF-8 line while enforcing `max_len`.
78 ///
79 /// The returned string includes the trailing `\n` when it is present. EOF
80 /// before a newline is accepted as long as the accumulated bytes do not
81 /// exceed `max_len`.
82 ///
83 /// # Parameters
84 /// - `max_len`: Maximum accepted line length in bytes, including `\n`.
85 ///
86 /// # Returns
87 /// The decoded UTF-8 line.
88 ///
89 /// # Errors
90 /// Returns [`ErrorKind::InvalidData`] when the line exceeds `max_len` or is
91 /// not valid UTF-8. Returns the first I/O error reported by the underlying
92 /// reader.
93 fn read_line_limited(&mut self, max_len: usize) -> Result<String>;
94
95 /// Reads one UTF-8 line into `output` while enforcing `max_len`.
96 ///
97 /// This method reads at most `max_len` bytes, validates the line as UTF-8,
98 /// and appends it to `output`. If the line is oversized or invalid UTF-8,
99 /// `output` is left unchanged. Oversized input may still consume the
100 /// accepted prefix from the reader while detecting the limit violation.
101 ///
102 /// # Parameters
103 /// - `output`: Destination string to append to.
104 /// - `max_len`: Maximum accepted line length in bytes, including `\n`.
105 ///
106 /// # Returns
107 /// Number of bytes appended to `output`.
108 ///
109 /// # Errors
110 /// Returns [`ErrorKind::InvalidData`] when the line exceeds `max_len` or is
111 /// not valid UTF-8. Returns the first I/O error reported by the underlying
112 /// reader.
113 fn read_line_limited_into(
114 &mut self,
115 output: &mut String,
116 max_len: usize,
117 ) -> Result<usize>;
118
119 /// Discards bytes through `delimiter` while enforcing `max_len`.
120 ///
121 /// The delimiter is consumed when it is found. EOF before the delimiter is
122 /// accepted as long as no more than `max_len` bytes are consumed.
123 ///
124 /// # Parameters
125 /// - `delimiter`: Delimiter byte to search for.
126 /// - `max_len`: Maximum number of bytes to discard, including the
127 /// delimiter.
128 ///
129 /// # Returns
130 /// Number of bytes discarded.
131 ///
132 /// # Errors
133 /// Returns [`ErrorKind::InvalidData`] when more than `max_len` bytes are
134 /// required before reaching `delimiter` or EOF. Returns the first I/O error
135 /// reported by the underlying reader.
136 fn discard_until_limited(
137 &mut self,
138 delimiter: u8,
139 max_len: usize,
140 ) -> Result<usize>;
141}
142
143impl<T> BufReadExt for T
144where
145 T: BufRead + ?Sized,
146{
147 #[inline]
148 fn read_until_limited(
149 &mut self,
150 delimiter: u8,
151 max_len: usize,
152 ) -> Result<Vec<u8>> {
153 read_until_limited_impl(self, delimiter, max_len)
154 }
155
156 #[inline]
157 fn read_until_limited_into(
158 &mut self,
159 delimiter: u8,
160 output: &mut Vec<u8>,
161 max_len: usize,
162 ) -> Result<usize> {
163 read_until_limited_into_impl(self, delimiter, output, max_len)
164 }
165
166 #[inline]
167 fn read_line_limited(&mut self, max_len: usize) -> Result<String> {
168 read_line_limited_impl(self, max_len)
169 }
170
171 #[inline]
172 fn read_line_limited_into(
173 &mut self,
174 output: &mut String,
175 max_len: usize,
176 ) -> Result<usize> {
177 read_line_limited_into_impl(self, output, max_len)
178 }
179
180 #[inline]
181 fn discard_until_limited(
182 &mut self,
183 delimiter: u8,
184 max_len: usize,
185 ) -> Result<usize> {
186 discard_until_limited_impl(self, delimiter, max_len)
187 }
188}
189
190/// Reads bytes through `delimiter` with a maximum result size.
191///
192/// # Parameters
193/// - `reader`: Buffered source reader.
194/// - `delimiter`: Delimiter byte to search for.
195/// - `max_len`: Maximum accepted result length.
196///
197/// # Returns
198/// Bytes read from the stream.
199///
200/// # Errors
201/// Returns an invalid-data error when the limit is exceeded, or an I/O error
202/// from `reader`.
203fn read_until_limited_impl<T>(
204 reader: &mut T,
205 delimiter: u8,
206 max_len: usize,
207) -> Result<Vec<u8>>
208where
209 T: BufRead + ?Sized,
210{
211 let mut output = Vec::new();
212 try_reserve_vec(&mut output, max_len.min(8192))?;
213 read_until_limited_into_impl(reader, delimiter, &mut output, max_len)?;
214 Ok(output)
215}
216
217/// Reads bytes through `delimiter` into `output` with a maximum result size.
218///
219/// # Parameters
220/// - `reader`: Buffered source reader.
221/// - `delimiter`: Delimiter byte to search for.
222/// - `output`: Destination vector to append to.
223/// - `max_len`: Maximum accepted result length.
224///
225/// # Returns
226/// Number of bytes appended to `output`.
227///
228/// # Errors
229/// Returns an invalid-data error when the limit is exceeded, or an I/O error
230/// from `reader`.
231fn read_until_limited_into_impl<T>(
232 reader: &mut T,
233 delimiter: u8,
234 output: &mut Vec<u8>,
235 max_len: usize,
236) -> Result<usize>
237where
238 T: BufRead + ?Sized,
239{
240 let mut appended = 0;
241 loop {
242 let available = reader.fill_buf()?;
243 if available.is_empty() {
244 return Ok(appended);
245 }
246
247 let delimiter_position =
248 available.iter().position(|byte| *byte == delimiter);
249 let requested =
250 delimiter_position.map_or(available.len(), |position| position + 1);
251 let remaining = max_len.saturating_sub(appended);
252 if requested > remaining {
253 if remaining > 0 {
254 try_reserve_vec(output, remaining)?;
255 output.extend_from_slice(&available[..remaining]);
256 reader.consume(remaining);
257 }
258 return Err(limit_exceeded_error(max_len, delimiter));
259 }
260
261 try_reserve_vec(output, requested)?;
262 output.extend_from_slice(&available[..requested]);
263 reader.consume(requested);
264 appended += requested;
265 if delimiter_position.is_some() {
266 return Ok(appended);
267 }
268 }
269}
270
271/// Reads one UTF-8 line with a maximum byte length.
272///
273/// # Parameters
274/// - `reader`: Buffered source reader.
275/// - `max_len`: Maximum accepted line length in bytes.
276///
277/// # Returns
278/// Decoded line.
279///
280/// # Errors
281/// Returns an invalid-data error when the line exceeds the limit or is not
282/// valid UTF-8, or an I/O error from `reader`.
283fn read_line_limited_impl<T>(reader: &mut T, max_len: usize) -> Result<String>
284where
285 T: BufRead + ?Sized,
286{
287 let mut output = String::new();
288 read_line_limited_into_impl(reader, &mut output, max_len)?;
289 Ok(output)
290}
291
292/// Reads one UTF-8 line into `output` with a maximum byte length.
293///
294/// # Parameters
295/// - `reader`: Buffered source reader.
296/// - `output`: Destination string to append to.
297/// - `max_len`: Maximum accepted line length in bytes.
298///
299/// # Returns
300/// Number of bytes appended to `output`.
301///
302/// # Errors
303/// Returns an invalid-data error when the line exceeds the limit or is not
304/// valid UTF-8, or an I/O error from `reader`.
305fn read_line_limited_into_impl<T>(
306 reader: &mut T,
307 output: &mut String,
308 max_len: usize,
309) -> Result<usize>
310where
311 T: BufRead + ?Sized,
312{
313 let mut bytes = Vec::new();
314 try_reserve_vec(&mut bytes, max_len.min(8192))?;
315 let count =
316 read_until_limited_into_impl(reader, b'\n', &mut bytes, max_len)?;
317 let line = String::from_utf8(bytes).map_err(invalid_utf8_error)?;
318 try_reserve_string(output, line.len())?;
319 output.push_str(&line);
320 Ok(count)
321}
322
323/// Discards bytes through `delimiter` with a maximum consumed size.
324///
325/// # Parameters
326/// - `reader`: Buffered source reader.
327/// - `delimiter`: Delimiter byte to search for.
328/// - `max_len`: Maximum accepted discard length.
329///
330/// # Returns
331/// Number of discarded bytes.
332///
333/// # Errors
334/// Returns an invalid-data error when the limit is exceeded, or an I/O error
335/// from `reader`.
336fn discard_until_limited_impl<T>(
337 reader: &mut T,
338 delimiter: u8,
339 max_len: usize,
340) -> Result<usize>
341where
342 T: BufRead + ?Sized,
343{
344 let mut discarded = 0;
345 loop {
346 let available = reader.fill_buf()?;
347 if available.is_empty() {
348 return Ok(discarded);
349 }
350
351 let delimiter_position =
352 available.iter().position(|byte| *byte == delimiter);
353 let requested =
354 delimiter_position.map_or(available.len(), |position| position + 1);
355 let remaining = max_len.saturating_sub(discarded);
356 if requested > remaining {
357 if remaining > 0 {
358 reader.consume(remaining);
359 }
360 return Err(limit_exceeded_error(max_len, delimiter));
361 }
362
363 reader.consume(requested);
364 discarded += requested;
365 if delimiter_position.is_some() {
366 return Ok(discarded);
367 }
368 }
369}
370
371/// Builds an invalid-data error for delimiter reads that exceed their limit.
372///
373/// # Parameters
374/// - `max_len`: Maximum accepted byte length.
375/// - `delimiter`: Delimiter byte searched by the caller.
376///
377/// # Returns
378/// An [`ErrorKind::InvalidData`] error.
379fn limit_exceeded_error(max_len: usize, delimiter: u8) -> Error {
380 Error::new(
381 ErrorKind::InvalidData,
382 format!(
383 "input exceeds maximum length of {max_len} bytes before delimiter {delimiter}"
384 ),
385 )
386}
387
388/// Converts an invalid UTF-8 line error into an I/O error.
389///
390/// # Parameters
391/// - `error`: UTF-8 conversion error.
392///
393/// # Returns
394/// An [`ErrorKind::InvalidData`] error containing the UTF-8 error context.
395fn invalid_utf8_error(error: FromUtf8Error) -> Error {
396 Error::new(
397 ErrorKind::InvalidData,
398 format!("limited line is not valid UTF-8: {error}"),
399 )
400}