bytesbuf_io/read.rs
1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4use std::fmt::Debug;
5
6use bytesbuf::BytesBuf;
7use bytesbuf::mem::{HasMemory, Memory};
8
9/// Allows for reading of bytes.
10///
11/// Only supports asynchronous access, exposing the read byte sequences as `bytesbuf::BytesBuf`.
12///
13/// # Ownership
14///
15/// The methods on this trait accept `&mut self` and take an exclusive reference to the source for
16/// the duration of the operation. This implies that only one read operation can be concurrently
17/// executed on the object.
18///
19/// # Memory management for efficient I/O
20///
21/// For optimal efficiency when performing I/O, reads should be performed into memory optimized
22/// for the underlying I/O endpoint. This is achieved by reserving memory from the implementation's
23/// memory provider before performing the read operation.
24///
25/// There are three ways to ensure you are using memory suitable for optimally efficient I/O:
26///
27/// 1. If you call methods that do not accept a `BytesBuf` (such as `read_any()`), the
28/// implementation will reserve memory from its memory provider internally. This is the simplest way
29/// to perform reads but only a limited API surface is available in this mode.
30/// 2. You may call [`Memory::reserve()`][2] on the implementation to reserve memory from its memory
31/// provider explicitly. This allows you to control the memory allocation more finely and
32/// potentially reuse existing buffers, improving efficiency.
33/// 3. You may sometimes want to call `reserve()` at certain times when Rust borrowing rules do
34/// not allow you to call it directly on the implementation because it has already been borrowed.
35/// In this case, you can obtain an independent reference to the memory provider first via
36/// [`HasMemory::memory()`][1], which allows you to bypass the need to borrow the implementing object itself.
37///
38/// Some implementations do not perform real I/O and only move data around in memory. Such
39/// implementations typically do not have any special memory requirements and will operate
40/// with the same efficiency regardless of which buffers the data is in. Any relaxed behaviors
41/// like this will typically be described in the implementation's API documentation.
42///
43/// # Thread safety
44///
45/// This trait requires `Send` from both the implementation and any returned futures.
46///
47/// [1]: bytesbuf::mem::HasMemory::memory
48/// [2]: bytesbuf::mem::Memory::reserve
49#[trait_variant::make(Send)]
50pub trait Read: HasMemory + Memory + Debug {
51 /// Type used to signal errors by the implementation of this trait.
52 type Error: std::error::Error + Send + Sync + 'static;
53
54 /// Reads at most `len` bytes into the provided buffer.
55 ///
56 /// It is not necessary for `into` to be empty - the buffer may already have some
57 /// bytes of data in it (e.g. from a previous read).
58 ///
59 /// The buffer will be extended with additional memory capacity
60 /// if it does not have enough remaining capacity to fit `len` additional bytes.
61 ///
62 /// Returns a tuple of the number of bytes read and the updated buffer.
63 ///
64 /// The returned [`BytesBuf`] will have 0 or more bytes of data appended to it on success,
65 /// with 0 appended bytes indicating that no more bytes can be read from this source. Any
66 /// data that was already in the buffer will remain untouched.
67 ///
68 /// # Example
69 ///
70 /// ```
71 /// # testing_aids::execute_or_terminate_process(|| futures::executor::block_on(async {
72 /// # use bytesbuf_io::testing::Null;
73 /// use bytesbuf::mem::Memory;
74 /// use bytesbuf_io::Read;
75 ///
76 /// # fn get_source() -> Null { Null::new() }
77 /// let mut source = get_source();
78 ///
79 /// let buf = source.reserve(100);
80 ///
81 /// let (bytes_read, mut buf) = source.read_at_most_into(10, buf).await.unwrap();
82 ///
83 /// assert!(bytes_read <= 10);
84 ///
85 /// let bytes = buf.consume_all();
86 /// # }));
87 /// ```
88 async fn read_at_most_into(&mut self, len: usize, into: BytesBuf) -> Result<(usize, BytesBuf), Self::Error>;
89
90 /// Reads an unspecified number of bytes into the provided buffer.
91 ///
92 /// The implementation will decide how many bytes to read based on its internal understanding of
93 /// what is optimal for sustained throughput at high efficiency. This may be a fixed size,
94 /// or it may be a variable size based on the current state of the source.
95 ///
96 /// It is not necessary for `into` to be empty - the buffer may already have some
97 /// bytes of data in it (e.g. from a previous read).
98 ///
99 /// The buffer will be extended with additional memory capacity
100 /// if it does not have enough remaining capacity to fit `len` additional bytes.
101 ///
102 /// Returns a tuple of the number of bytes read and the updated buffer.
103 ///
104 /// The returned [`BytesBuf`] will have 0 or more bytes of data appended to it on success,
105 /// with 0 appended bytes indicating that no more bytes can be read from this source. Any
106 /// data that was already in the buffer will remain untouched.
107 ///
108 /// # Example
109 ///
110 /// ```
111 /// # testing_aids::execute_or_terminate_process(|| futures::executor::block_on(async {
112 /// # use bytesbuf_io::testing::Null;
113 /// use bytesbuf_io::Read;
114 ///
115 /// # fn get_source() -> Null { Null::new() }
116 /// let mut source = get_source();
117 ///
118 /// let mut buf = source.read_any().await.unwrap();
119 ///
120 /// if buf.len() == 0 {
121 /// println!("Source ended immediately - nothing we can do.");
122 /// return;
123 /// }
124 ///
125 /// loop {
126 /// // We want at least 1 MB of data, read in whatever sized pieces the I/O endpoint
127 /// // considers optimal.
128 /// if buf.len() >= 1024 * 1024 {
129 /// println!("Got 1 MB of data");
130 /// break;
131 /// }
132 ///
133 /// let (bytes_read, new_buf) = source.read_more_into(buf).await.unwrap();
134 /// buf = new_buf;
135 ///
136 /// if bytes_read == 0 {
137 /// println!("Source ended - no more data before we reached 1 MB");
138 /// break;
139 /// }
140 /// }
141 /// # }));
142 /// ```
143 async fn read_more_into(&mut self, into: BytesBuf) -> Result<(usize, BytesBuf), Self::Error>;
144
145 /// Reads an unspecified number of bytes as a new buffer.
146 ///
147 /// The implementation will decide how many bytes to read based on its internal understanding of
148 /// what is optimal for sustained throughput at high efficiency. This may be a fixed size,
149 /// or it may be a variable size based on the current state of the source.
150 ///
151 /// The returned [`BytesBuf`] will have 0 or more bytes of data appended to it on success,
152 /// with 0 appended bytes indicating that no more bytes can be read from this source.
153 ///
154 /// # Security
155 ///
156 /// **This method is insecure if the side producing the bytes is not trusted**. An attacker
157 /// may trickle data byte-by-byte, consuming a large amount of I/O resources.
158 ///
159 /// Robust code working with untrusted sources should take precautions such as only processing
160 /// read data when either a time or length threshold is reached and reusing buffers that
161 /// have remaining capacity, appending additional data to existing buffers using
162 /// [`read_more_into()`][crate::Read::read_more_into] instead of reserving new buffers
163 /// for each read operation.
164 ///
165 /// # Example
166 ///
167 /// ```
168 /// # testing_aids::execute_or_terminate_process(|| futures::executor::block_on(async {
169 /// # use bytesbuf_io::testing::Null;
170 /// use bytesbuf_io::Read;
171 ///
172 /// # fn get_source() -> Null { Null::new() }
173 /// let mut source = get_source();
174 ///
175 /// let buf = source.read_any().await.unwrap();
176 ///
177 /// println!("first read produced {} bytes of data", buf.len());
178 /// # }));
179 /// ```
180 async fn read_any(&mut self) -> Result<BytesBuf, Self::Error>;
181}