1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
//! A cache of services.

use super::error;
use futures_core::Stream;
use futures_util::stream::FuturesUnordered;
pub use indexmap::Equivalent;
use indexmap::IndexMap;
use std::future::Future;
use std::hash::Hash;
use std::pin::Pin;
use std::task::{Context, Poll};
use tokio::sync::oneshot;
use tower_service::Service;
use tracing::{debug, trace};

/// Drives readiness over a set of services.
///
/// The cache maintains two internal data structures:
///
/// * a set of _pending_ services that have not yet become ready; and
/// * a set of _ready_ services that have previously polled ready.
///
/// As each `S` typed [`Service`] is added to the cache via [`ReadyCache::push`], it
/// is added to the _pending set_. As [`ReadyCache::poll_pending`] is invoked,
/// pending services are polled and added to the _ready set_.
///
/// [`ReadyCache::call_ready`] (or [`ReadyCache::call_ready_index`]) dispatches a
/// request to the specified service, but panics if the specified service is not
/// in the ready set. The `ReadyCache::check_*` functions can be used to ensure
/// that a service is ready before dispatching a request.
///
/// The ready set can hold services for an abitrarily long time. During this
/// time, the runtime may process events that invalidate that ready state (for
/// instance, if a keepalive detects a lost connection). In such cases, callers
/// should use [`ReadyCache::check_ready`] (or [`ReadyCache::check_ready_index`])
/// immediately before dispatching a request to ensure that the service has not
/// become unavailable.
///
/// Once `ReadyCache::call_ready*` is invoked, the service is placed back into
/// the _pending_ set to be driven to readiness again.
///
/// When `ReadyCache::check_ready*` returns `false`, it indicates that the
/// specified service is _not_ ready. If an error is returned, this indicats that
/// the server failed and has been removed from the cache entirely.
///
/// [`ReadyCache::evict`] can be used to remove a service from the cache (by key),
/// though the service may not be dropped (if it is currently pending) until
/// [`ReadyCache::poll_pending`] is invoked.
///
/// Note that the by-index accessors are provided to support use cases (like
/// power-of-two-choices load balancing) where the caller does not care to keep
/// track of each service's key. Instead, it needs only to access _some_ ready
/// service. In such a case, it should be noted that calls to
/// [`ReadyCache::poll_pending`] and [`ReadyCache::evict`] may perturb the order of
/// the ready set, so any cached indexes should be discarded after such a call.
#[derive(Debug)]
pub struct ReadyCache<K, S, Req>
where
    K: Eq + Hash,
{
    /// A stream of services that are not yet ready.
    pending: FuturesUnordered<Pending<K, S, Req>>,
    /// An index of cancelation handles for pending streams.
    pending_cancel_txs: IndexMap<K, CancelTx>,

    /// Services that have previously become ready. Readiness can become stale,
    /// so a given service should be polled immediately before use.
    ///
    /// The cancelation oneshot is preserved (though unused) while the service is
    /// ready so that it need not be reallocated each time a request is
    /// dispatched.
    ready: IndexMap<K, (S, CancelPair)>,
}

// Safety: This is safe because we do not use `Pin::new_unchecked`.
impl<S, K: Eq + Hash, Req> Unpin for ReadyCache<K, S, Req> {}

type CancelRx = oneshot::Receiver<()>;
type CancelTx = oneshot::Sender<()>;
type CancelPair = (CancelTx, CancelRx);

#[derive(Debug)]
enum PendingError<K, E> {
    Canceled(K),
    Inner(K, E),
}

/// A [`Future`] that becomes satisfied when an `S`-typed service is ready.
///
/// May fail due to cancelation, i.e. if the service is evicted from the balancer.
#[derive(Debug)]
struct Pending<K, S, Req> {
    key: Option<K>,
    cancel: Option<CancelRx>,
    ready: Option<S>,
    _pd: std::marker::PhantomData<Req>,
}

// === ReadyCache ===

impl<K, S, Req> Default for ReadyCache<K, S, Req>
where
    K: Eq + Hash,
    S: Service<Req>,
{
    fn default() -> Self {
        Self {
            ready: IndexMap::default(),
            pending: FuturesUnordered::new(),
            pending_cancel_txs: IndexMap::default(),
        }
    }
}

impl<K, S, Req> ReadyCache<K, S, Req>
where
    K: Eq + Hash,
{
    /// Returns the total number of services in the cache.
    pub fn len(&self) -> usize {
        self.ready_len() + self.pending_len()
    }

    /// Returns whether or not there are any services in the cache.
    pub fn is_empty(&self) -> bool {
        self.ready.is_empty() && self.pending.is_empty()
    }

    /// Returns the number of services in the ready set.
    pub fn ready_len(&self) -> usize {
        self.ready.len()
    }

    /// Returns the number of services in the unready set.
    pub fn pending_len(&self) -> usize {
        self.pending.len()
    }

    /// Returns true iff the given key is in the unready set.
    pub fn pending_contains<Q: Hash + Equivalent<K>>(&self, key: &Q) -> bool {
        self.pending_cancel_txs.contains_key(key)
    }

    /// Obtains a reference to a service in the ready set by key.
    pub fn get_ready<Q: Hash + Equivalent<K>>(&self, key: &Q) -> Option<(usize, &K, &S)> {
        self.ready.get_full(key).map(|(i, k, v)| (i, k, &v.0))
    }

    /// Obtains a mutable reference to a service in the ready set by key.
    pub fn get_ready_mut<Q: Hash + Equivalent<K>>(
        &mut self,
        key: &Q,
    ) -> Option<(usize, &K, &mut S)> {
        self.ready
            .get_full_mut(key)
            .map(|(i, k, v)| (i, k, &mut v.0))
    }

    /// Obtains a reference to a service in the ready set by index.
    pub fn get_ready_index(&self, idx: usize) -> Option<(&K, &S)> {
        self.ready.get_index(idx).map(|(k, v)| (k, &v.0))
    }

    /// Obtains a mutable reference to a service in the ready set by index.
    pub fn get_ready_index_mut(&mut self, idx: usize) -> Option<(&mut K, &mut S)> {
        self.ready.get_index_mut(idx).map(|(k, v)| (k, &mut v.0))
    }

    /// Evicts an item from the cache.
    ///
    /// Returns true if a service was marked for eviction.
    ///
    /// Services are dropped from the ready set immediately. Services in the
    /// pending set are marked for cancellation, but [`ReadyCache::poll_pending`]
    /// must be called to cause the service to be dropped.
    pub fn evict<Q: Hash + Equivalent<K>>(&mut self, key: &Q) -> bool {
        let canceled = if let Some(c) = self.pending_cancel_txs.swap_remove(key) {
            c.send(()).expect("cancel receiver lost");
            true
        } else {
            false
        };

        self.ready
            .swap_remove_full(key)
            .map(|_| true)
            .unwrap_or(canceled)
    }
}

impl<K, S, Req> ReadyCache<K, S, Req>
where
    K: Clone + Eq + Hash,
    S: Service<Req>,
    <S as Service<Req>>::Error: Into<crate::BoxError>,
    S::Error: Into<crate::BoxError>,
{
    /// Pushes a new service onto the pending set.
    ///
    /// The service will be promoted to the ready set as [`poll_pending`] is invoked.
    ///
    /// Note that this does **not** remove services from the ready set. Once the
    /// old service is used, it will be dropped instead of being added back to
    /// the pending set; OR, when the new service becomes ready, it will replace
    /// the prior service in the ready set.
    ///
    /// [`poll_pending`]: crate::ready_cache::cache::ReadyCache::poll_pending
    pub fn push(&mut self, key: K, svc: S) {
        let cancel = oneshot::channel();
        self.push_pending(key, svc, cancel);
    }

    fn push_pending(&mut self, key: K, svc: S, (cancel_tx, cancel_rx): CancelPair) {
        if let Some(c) = self.pending_cancel_txs.insert(key.clone(), cancel_tx) {
            // If there is already a service for this key, cancel it.
            c.send(()).expect("cancel receiver lost");
        }
        self.pending.push(Pending {
            key: Some(key),
            cancel: Some(cancel_rx),
            ready: Some(svc),
            _pd: std::marker::PhantomData,
        });
    }

    /// Polls services pending readiness, adding ready services to the ready set.
    ///
    /// Returns [`Poll::Ready`] when there are no remaining unready services.
    /// [`poll_pending`] should be called again after [`push`] or
    /// [`call_ready_index`] are invoked.
    ///
    /// Failures indicate that an individual pending service failed to become
    /// ready (and has been removed from the cache). In such a case,
    /// [`poll_pending`] should typically be called again to continue driving
    /// pending services to readiness.
    ///
    /// [`poll_pending`]: crate::ready_cache::cache::ReadyCache::poll_pending
    /// [`push`]: crate::ready_cache::cache::ReadyCache::push
    /// [`call_ready_index`]: crate::ready_cache::cache::ReadyCache::call_ready_index
    pub fn poll_pending(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), error::Failed<K>>> {
        loop {
            match Pin::new(&mut self.pending).poll_next(cx) {
                Poll::Pending => return Poll::Pending,
                Poll::Ready(None) => return Poll::Ready(Ok(())),
                Poll::Ready(Some(Ok((key, svc, cancel_rx)))) => {
                    trace!("endpoint ready");
                    let cancel_tx = self.pending_cancel_txs.swap_remove(&key);
                    if let Some(cancel_tx) = cancel_tx {
                        // Keep track of the cancelation so that it need not be
                        // recreated after the service is used.
                        self.ready.insert(key, (svc, (cancel_tx, cancel_rx)));
                    } else {
                        // This should not technically be possible. We must have decided to cancel
                        // a Service (by sending on the CancelTx), yet that same service then
                        // returns Ready. Since polling a Pending _first_ polls the CancelRx, that
                        // _should_ always see our CancelTx send. Yet empirically, that isn't true:
                        //
                        //   https://github.com/tower-rs/tower/issues/415
                        //
                        // So, we instead detect the endpoint as canceled at this point. That
                        // should be fine, since the oneshot is only really there to ensure that
                        // the Pending is polled again anyway.
                        //
                        // We assert that this can't happen in debug mode so that hopefully one day
                        // we can find a test that triggers this reliably.
                        debug_assert!(cancel_tx.is_some());
                        debug!("canceled endpoint removed when ready");
                    }
                }
                Poll::Ready(Some(Err(PendingError::Canceled(_)))) => {
                    debug!("endpoint canceled");
                    // The cancellation for this service was removed in order to
                    // cause this cancellation.
                }
                Poll::Ready(Some(Err(PendingError::Inner(key, e)))) => {
                    let cancel_tx = self.pending_cancel_txs.swap_remove(&key);
                    if cancel_tx.is_some() {
                        return Err(error::Failed(key, e.into())).into();
                    } else {
                        // See comment for the same clause under Ready(Some(Ok)).
                        debug_assert!(cancel_tx.is_some());
                        debug!("canceled endpoint removed on error");
                    }
                }
            }
        }
    }

    /// Checks whether the referenced endpoint is ready.
    ///
    /// Returns true if the endpoint is ready and false if it is not. An error is
    /// returned if the endpoint fails.
    pub fn check_ready<Q: Hash + Equivalent<K>>(
        &mut self,
        cx: &mut Context<'_>,
        key: &Q,
    ) -> Result<bool, error::Failed<K>> {
        match self.ready.get_full_mut(key) {
            Some((index, _, _)) => self.check_ready_index(cx, index),
            None => Ok(false),
        }
    }

    /// Checks whether the referenced endpoint is ready.
    ///
    /// If the service is no longer ready, it is moved back into the pending set
    /// and `false` is returned.
    ///
    /// If the service errors, it is removed and dropped and the error is returned.
    pub fn check_ready_index(
        &mut self,
        cx: &mut Context<'_>,
        index: usize,
    ) -> Result<bool, error::Failed<K>> {
        let svc = match self.ready.get_index_mut(index) {
            None => return Ok(false),
            Some((_, (svc, _))) => svc,
        };
        match svc.poll_ready(cx) {
            Poll::Ready(Ok(())) => Ok(true),
            Poll::Pending => {
                // became unready; so move it back there.
                let (key, (svc, cancel)) = self
                    .ready
                    .swap_remove_index(index)
                    .expect("invalid ready index");

                // If a new version of this service has been added to the
                // unready set, don't overwrite it.
                if !self.pending_contains(&key) {
                    self.push_pending(key, svc, cancel);
                }

                Ok(false)
            }
            Poll::Ready(Err(e)) => {
                // failed, so drop it.
                let (key, _) = self
                    .ready
                    .swap_remove_index(index)
                    .expect("invalid ready index");
                Err(error::Failed(key, e.into()))
            }
        }
    }

    /// Calls a ready service by key.
    ///
    /// # Panics
    ///
    /// If the specified key does not exist in the ready
    pub fn call_ready<Q: Hash + Equivalent<K>>(&mut self, key: &Q, req: Req) -> S::Future {
        let (index, _, _) = self
            .ready
            .get_full_mut(key)
            .expect("check_ready was not called");
        self.call_ready_index(index, req)
    }

    /// Calls a ready service by index.
    ///
    /// # Panics
    ///
    /// If the specified index is out of range.
    pub fn call_ready_index(&mut self, index: usize, req: Req) -> S::Future {
        let (key, (mut svc, cancel)) = self
            .ready
            .swap_remove_index(index)
            .expect("check_ready_index was not called");

        let fut = svc.call(req);

        // If a new version of this service has been added to the
        // unready set, don't overwrite it.
        if !self.pending_contains(&key) {
            self.push_pending(key, svc, cancel);
        }

        fut
    }
}

// === Pending ===

// Safety: No use unsafe access therefore this is safe.
impl<K, S, Req> Unpin for Pending<K, S, Req> {}

impl<K, S, Req> Future for Pending<K, S, Req>
where
    S: Service<Req>,
{
    type Output = Result<(K, S, CancelRx), PendingError<K, S::Error>>;

    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
        let mut fut = self.cancel.as_mut().expect("polled after complete");
        if let Poll::Ready(r) = Pin::new(&mut fut).poll(cx) {
            assert!(r.is_ok(), "cancel sender lost");
            let key = self.key.take().expect("polled after complete");
            return Err(PendingError::Canceled(key)).into();
        }

        match self
            .ready
            .as_mut()
            .expect("polled after ready")
            .poll_ready(cx)
        {
            Poll::Pending => Poll::Pending,
            Poll::Ready(Ok(())) => {
                let key = self.key.take().expect("polled after complete");
                let cancel = self.cancel.take().expect("polled after complete");
                Ok((key, self.ready.take().expect("polled after ready"), cancel)).into()
            }
            Poll::Ready(Err(e)) => {
                let key = self.key.take().expect("polled after compete");
                Err(PendingError::Inner(key, e)).into()
            }
        }
    }
}