Skip to main content

bytesbuf_io/
read.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4use std::fmt::Debug;
5
6use bytesbuf::BytesBuf;
7use bytesbuf::mem::{HasMemory, Memory};
8
9/// Allows for reading of bytes.
10///
11/// Only supports asynchronous access, exposing the read byte sequences as `bytesbuf::BytesBuf`.
12///
13/// # Ownership
14///
15/// The methods on this trait accept `&mut self` and take an exclusive reference to the source for
16/// the duration of the operation. This implies that only one read operation can be concurrently
17/// executed on the object.
18///
19/// # Memory management for efficient I/O
20///
21/// For optimal efficiency when performing I/O, reads should be performed into memory optimized
22/// for the underlying I/O endpoint. This is achieved by reserving memory from the implementation's
23/// memory provider before performing the read operation.
24///
25/// There are three ways to ensure you are using memory suitable for optimally efficient I/O:
26///
27/// 1. If you call methods that do not accept a `BytesBuf` (such as `read_any()`), the
28///    implementation will reserve memory from its memory provider internally. This is the simplest way
29///    to perform reads but only a limited API surface is available in this mode.
30/// 2. You may call [`Memory::reserve()`][2] on the implementation to reserve memory from its memory
31///    provider explicitly. This allows you to control the memory allocation more finely and
32///    potentially reuse existing buffers, improving efficiency.
33/// 3. You may sometimes want to call `reserve()` at certain times when Rust borrowing rules do
34///    not allow you to call it directly on the implementation because it has already been borrowed.
35///    In this case, you can obtain an independent reference to the memory provider first via
36///    [`HasMemory::memory()`][1], which allows you to bypass the need to borrow the implementing object itself.
37///
38/// Some implementations do not perform real I/O and only move data around in memory. Such
39/// implementations typically do not have any special memory requirements and will operate
40/// with the same efficiency regardless of which buffers the data is in. Any relaxed behaviors
41/// like this will typically be described in the implementation's API documentation.
42///
43/// # Thread safety
44///
45/// This trait requires `Send` from both the implementation and any returned futures.
46///
47/// [1]: bytesbuf::mem::HasMemory::memory
48/// [2]: bytesbuf::mem::Memory::reserve
49#[trait_variant::make(Send)]
50pub trait Read: HasMemory + Memory + Debug {
51    /// Type used to signal errors by the implementation of this trait.
52    type Error: std::error::Error + Send + Sync + 'static;
53
54    /// Reads at most `len` bytes into the provided buffer.
55    ///
56    /// It is not necessary for `into` to be empty - the buffer may already have some
57    /// bytes of data in it (e.g. from a previous read).
58    ///
59    /// The buffer will be extended with additional memory capacity
60    /// if it does not have enough remaining capacity to fit `len` additional bytes.
61    ///
62    /// Returns a tuple of the number of bytes read and the updated buffer.
63    ///
64    /// The returned [`BytesBuf`] will have 0 or more bytes of data appended to it on success,
65    /// with 0 appended bytes indicating that no more bytes can be read from this source. Any
66    /// data that was already in the buffer will remain untouched.
67    ///
68    /// # Example
69    ///
70    /// ```
71    /// # testing_aids::execute_or_terminate_process(|| futures::executor::block_on(async {
72    /// # use bytesbuf_io::testing::Null;
73    /// use bytesbuf::mem::Memory;
74    /// use bytesbuf_io::Read;
75    ///
76    /// # fn get_source() -> Null { Null::new() }
77    /// let mut source = get_source();
78    ///
79    /// let buf = source.reserve(100);
80    ///
81    /// let (bytes_read, mut buf) = source.read_at_most_into(10, buf).await.unwrap();
82    ///
83    /// assert!(bytes_read <= 10);
84    ///
85    /// let bytes = buf.consume_all();
86    /// # }));
87    /// ```
88    async fn read_at_most_into(&mut self, len: usize, into: BytesBuf) -> Result<(usize, BytesBuf), Self::Error>;
89
90    /// Reads an unspecified number of bytes into the provided buffer.
91    ///
92    /// The implementation will decide how many bytes to read based on its internal understanding of
93    /// what is optimal for sustained throughput at high efficiency. This may be a fixed size,
94    /// or it may be a variable size based on the current state of the source.
95    ///
96    /// It is not necessary for `into` to be empty - the buffer may already have some
97    /// bytes of data in it (e.g. from a previous read).
98    ///
99    /// The buffer will be extended with additional memory capacity
100    /// if it does not have enough remaining capacity to fit `len` additional bytes.
101    ///
102    /// Returns a tuple of the number of bytes read and the updated buffer.
103    ///
104    /// The returned [`BytesBuf`] will have 0 or more bytes of data appended to it on success,
105    /// with 0 appended bytes indicating that no more bytes can be read from this source. Any
106    /// data that was already in the buffer will remain untouched.
107    ///
108    /// # Example
109    ///
110    /// ```
111    /// # testing_aids::execute_or_terminate_process(|| futures::executor::block_on(async {
112    /// # use bytesbuf_io::testing::Null;
113    /// use bytesbuf_io::Read;
114    ///
115    /// # fn get_source() -> Null { Null::new() }
116    /// let mut source = get_source();
117    ///
118    /// let mut buf = source.read_any().await.unwrap();
119    ///
120    /// if buf.len() == 0 {
121    ///     println!("Source ended immediately - nothing we can do.");
122    ///     return;
123    /// }
124    ///
125    /// loop {
126    ///     // We want at least 1 MB of data, read in whatever sized pieces the I/O endpoint
127    ///     // considers optimal.
128    ///     if buf.len() >= 1024 * 1024 {
129    ///         println!("Got 1 MB of data");
130    ///         break;
131    ///     }
132    ///
133    ///     let (bytes_read, new_buf) = source.read_more_into(buf).await.unwrap();
134    ///     buf = new_buf;
135    ///
136    ///     if bytes_read == 0 {
137    ///         println!("Source ended - no more data before we reached 1 MB");
138    ///         break;
139    ///     }
140    /// }
141    /// # }));
142    /// ```
143    async fn read_more_into(&mut self, into: BytesBuf) -> Result<(usize, BytesBuf), Self::Error>;
144
145    /// Reads an unspecified number of bytes as a new buffer.
146    ///
147    /// The implementation will decide how many bytes to read based on its internal understanding of
148    /// what is optimal for sustained throughput at high efficiency. This may be a fixed size,
149    /// or it may be a variable size based on the current state of the source.
150    ///
151    /// The returned [`BytesBuf`] will have 0 or more bytes of data appended to it on success,
152    /// with 0 appended bytes indicating that no more bytes can be read from this source.
153    ///
154    /// # Security
155    ///
156    /// **This method is insecure if the side producing the bytes is not trusted**. An attacker
157    /// may trickle data byte-by-byte, consuming a large amount of I/O resources.
158    ///
159    /// Robust code working with untrusted sources should take precautions such as only processing
160    /// read data when either a time or length threshold is reached and reusing buffers that
161    /// have remaining capacity, appending additional data to existing buffers using
162    /// [`read_more_into()`][crate::Read::read_more_into] instead of reserving new buffers
163    /// for each read operation.
164    ///
165    /// # Example
166    ///
167    /// ```
168    /// # testing_aids::execute_or_terminate_process(|| futures::executor::block_on(async {
169    /// # use bytesbuf_io::testing::Null;
170    /// use bytesbuf_io::Read;
171    ///
172    /// # fn get_source() -> Null { Null::new() }
173    /// let mut source = get_source();
174    ///
175    /// let buf = source.read_any().await.unwrap();
176    ///
177    /// println!("first read produced {} bytes of data", buf.len());
178    /// # }));
179    /// ```
180    async fn read_any(&mut self) -> Result<BytesBuf, Self::Error>;
181}