Skip to main content

vortex_io/
limit.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::future::Future;
5use std::pin::Pin;
6use std::sync::Arc;
7use std::task::Context;
8use std::task::Poll;
9use std::task::ready;
10
11use futures::Stream;
12use futures::stream::FuturesUnordered;
13use pin_project_lite::pin_project;
14use tokio::sync::OwnedSemaphorePermit;
15use tokio::sync::Semaphore;
16use tokio::sync::TryAcquireError;
17use vortex_error::VortexExpect;
18
19pin_project! {
20    /// [`Future`] that carries the amount of memory it will require to hold the completed value.
21    ///
22    /// The `OwnedSemaphorePermit` ensures permits are automatically returned when this future
23    /// is dropped, either after completion or if cancelled/aborted.
24    struct SizedFut<Fut> {
25        #[pin]
26        inner: Fut,
27        // Owned permit that will be automatically dropped when the future completes or is dropped
28        _permits: OwnedSemaphorePermit,
29    }
30}
31
32impl<Fut: Future> Future for SizedFut<Fut> {
33    type Output = Fut::Output;
34
35    fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
36        let this = self.project();
37        let result = ready!(this.inner.poll(cx));
38        Poll::Ready(result)
39    }
40}
41
42pin_project! {
43    /// A [`Stream`] that can work on several simultaneous requests, capping the amount of memory it
44    /// uses at any given point.
45    ///
46    /// It is meant to serve as a buffer between a producer and consumer of IO requests, with built-in
47    /// backpressure that prevents the producer from materializing more than a specified maximum
48    /// amount of memory.
49    ///
50    /// This crate internally makes use of tokio's [Semaphore], and thus is only available with
51    /// the `tokio` feature enabled.
52    pub struct SizeLimitedStream<Fut> {
53        #[pin]
54        inflight: FuturesUnordered<SizedFut<Fut>>,
55        bytes_available: Arc<Semaphore>,
56    }
57}
58
59impl<Fut> SizeLimitedStream<Fut> {
60    pub fn new(max_bytes: usize) -> Self {
61        Self {
62            inflight: FuturesUnordered::new(),
63            bytes_available: Arc::new(Semaphore::new(max_bytes)),
64        }
65    }
66
67    pub fn bytes_available(&self) -> usize {
68        self.bytes_available.available_permits()
69    }
70}
71
72impl<Fut> SizeLimitedStream<Fut>
73where
74    Fut: Future,
75{
76    /// Push a future into the queue after reserving `bytes` of capacity.
77    ///
78    /// This call may need to wait until there is sufficient capacity available in the stream to
79    /// begin work on this future.
80    pub async fn push(&self, fut: Fut, bytes: usize) {
81        // Attempt to acquire enough permits to begin working on a request that will occupy
82        // `bytes` amount of memory when it completes.
83        // Acquiring the permits is what creates backpressure for the producer.
84        let permits = Arc::clone(&self.bytes_available)
85            .acquire_many_owned(bytes.try_into().vortex_expect("bytes must fit in u32"))
86            .await
87            .unwrap_or_else(|_| unreachable!("pushing to closed semaphore"));
88
89        let sized_fut = SizedFut {
90            inner: fut,
91            _permits: permits,
92        };
93
94        // push into the pending queue
95        self.inflight.push(sized_fut);
96    }
97
98    /// Synchronous push method. This method will attempt to push if there is enough capacity
99    /// to begin work on the future immediately.
100    ///
101    /// If there is not enough capacity, the original future is returned to the caller.
102    pub fn try_push(&self, fut: Fut, bytes: usize) -> Result<(), Fut> {
103        match Arc::clone(&self.bytes_available)
104            .try_acquire_many_owned(bytes.try_into().vortex_expect("bytes must fit in u32"))
105        {
106            Ok(permits) => {
107                let sized_fut = SizedFut {
108                    inner: fut,
109                    _permits: permits,
110                };
111
112                self.inflight.push(sized_fut);
113                Ok(())
114            }
115            Err(acquire_err) => match acquire_err {
116                TryAcquireError::Closed => {
117                    unreachable!("try_pushing to closed semaphore");
118                }
119
120                // No permits available, return the future back to the client so they can
121                // try again.
122                TryAcquireError::NoPermits => Err(fut),
123            },
124        }
125    }
126}
127
128impl<Fut> Stream for SizeLimitedStream<Fut>
129where
130    Fut: Future,
131{
132    type Item = Fut::Output;
133
134    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
135        let this = self.project();
136        match ready!(this.inflight.poll_next(cx)) {
137            None => Poll::Ready(None),
138            Some(result) => {
139                // Permits are automatically returned when the SizedFut is dropped
140                // after being polled to completion by FuturesUnordered
141                Poll::Ready(Some(result))
142            }
143        }
144    }
145}
146
147#[cfg(test)]
148mod tests {
149    use std::future;
150    use std::io;
151
152    use bytes::Bytes;
153    use futures::FutureExt;
154    use futures::StreamExt;
155    use futures::future::BoxFuture;
156
157    use crate::limit::SizeLimitedStream;
158
159    async fn make_future(len: usize) -> Bytes {
160        "a".as_bytes().iter().copied().cycle().take(len).collect()
161    }
162
163    #[tokio::test]
164    async fn test_size_limit() {
165        let mut size_limited = SizeLimitedStream::new(10);
166        size_limited.push(make_future(5), 5).await;
167        size_limited.push(make_future(5), 5).await;
168
169        // Pushing last request should fail, because we have 10 bytes outstanding.
170        assert!(size_limited.try_push(make_future(1), 1).is_err());
171
172        // but, we can pop off a finished work item, and then enqueue.
173        assert!(size_limited.next().await.is_some());
174        assert!(size_limited.try_push(make_future(1), 1).is_ok());
175    }
176
177    #[tokio::test]
178    async fn test_does_not_leak_permits() {
179        let bad_fut: BoxFuture<'static, io::Result<Bytes>> =
180            future::ready(Err(io::Error::other("badness"))).boxed();
181
182        let good_fut: BoxFuture<'static, io::Result<Bytes>> =
183            future::ready(Ok(Bytes::from("aaaaa"))).boxed();
184
185        let mut size_limited = SizeLimitedStream::new(10);
186        size_limited.push(bad_fut, 10).await;
187
188        // attempt to push should fail, as all 10 bytes of capacity is occupied by bad_fut.
189        let good_fut = size_limited
190            .try_push(good_fut, 5)
191            .expect_err("try_push should fail");
192
193        // Even though the result was an error, the 10 bytes of capacity should be returned back to
194        // the stream, allowing us to push the next request.
195        let next = size_limited.next().await.unwrap();
196        assert!(next.is_err());
197
198        assert_eq!(size_limited.bytes_available(), 10);
199        assert!(size_limited.try_push(good_fut, 5).is_ok());
200    }
201
202    #[tokio::test]
203    async fn test_size_limited_stream_zero_capacity() {
204        let stream = SizeLimitedStream::new(0);
205
206        // Should not be able to push anything
207        let result = stream.try_push(async { vec![1u8] }, 1);
208        assert!(result.is_err());
209    }
210
211    #[tokio::test]
212    async fn test_size_limited_stream_dropped_future_releases_permits() {
213        use futures::future::BoxFuture;
214
215        let stream = SizeLimitedStream::<BoxFuture<'static, Vec<u8>>>::new(10);
216
217        // Push a future that will never complete
218        stream
219            .push(
220                Box::pin(async {
221                    // This future will be dropped before completion
222                    futures::future::pending::<Vec<u8>>().await
223                }),
224                5,
225            )
226            .await;
227
228        // Push another future
229        stream.push(Box::pin(async { vec![1u8; 3] }), 3).await;
230
231        // We should have 2 bytes available now
232        assert_eq!(stream.bytes_available(), 2);
233
234        // Drop the stream without consuming the futures
235        drop(stream);
236
237        // Create a new stream to verify permits aren't leaked
238        let mut new_stream = SizeLimitedStream::<BoxFuture<'static, Vec<u8>>>::new(10);
239
240        // Should be able to use all 10 bytes
241        new_stream.push(Box::pin(async { vec![0u8; 10] }), 10).await;
242        assert_eq!(new_stream.bytes_available(), 0);
243
244        // Consume to verify it works
245        let result = new_stream.next().await;
246        assert!(result.is_some());
247        assert_eq!(new_stream.bytes_available(), 10);
248    }
249
250    #[tokio::test]
251    async fn test_size_limited_stream_exact_capacity() {
252        use futures::future::BoxFuture;
253
254        let mut stream = SizeLimitedStream::<BoxFuture<'static, Vec<u8>>>::new(10);
255
256        // Push exactly the capacity
257        stream.push(Box::pin(async { vec![0u8; 10] }), 10).await;
258
259        // Should not be able to push more
260        let result = stream.try_push(Box::pin(async { vec![1u8] }), 1);
261        assert!(result.is_err());
262
263        // After consuming, should be able to push again
264        stream
265            .next()
266            .await
267            .expect("The 10 byte vector ought to be in there!");
268        assert_eq!(stream.bytes_available(), 10);
269
270        let result = stream.try_push(Box::pin(async { vec![1u8; 5] }), 5);
271        assert!(result.is_ok());
272    }
273
274    #[tokio::test]
275    async fn test_size_limited_stream_multiple_small_pushes() {
276        let mut stream = SizeLimitedStream::new(100);
277
278        // Push many small items
279        for i in 0..10 {
280            #[allow(clippy::cast_possible_truncation)]
281            stream.push(async move { vec![i as u8; 5] }, 5).await;
282        }
283
284        // Should have used 50 bytes
285        assert_eq!(stream.bytes_available(), 50);
286
287        // Consume all
288        let mut count = 0;
289        while stream.next().await.is_some() {
290            count += 1;
291            if count == 10 {
292                break;
293            }
294        }
295
296        assert_eq!(count, 10);
297        assert_eq!(stream.bytes_available(), 100);
298    }
299
300    #[test]
301    fn test_size_overflow_protection() {
302        let stream = SizeLimitedStream::new(100);
303
304        // Test with size that would overflow u32 on 32-bit systems
305        // but this test assumes 64-bit where usize > u32::MAX is possible
306        #[cfg(target_pointer_width = "64")]
307        {
308            let _large_size = (u32::MAX as usize) + 1;
309            // This should panic with current implementation
310            // We're documenting the issue rather than testing the panic
311            // as the behavior may change
312        }
313
314        // Test with reasonable size
315        let result = stream.try_push(async { vec![0u8; 50] }, 50);
316        assert!(result.is_ok());
317    }
318}