fastly 0.13.0 - Docs.rs

//! Internal pending-request state machine for guest-side HTTP caching.
//!
//! A public `PendingRequest` may represent either a simple host pending request or a cache
//! transaction that still needs to advance through lookup, collapse, revalidation, and candidate
//! application steps. This module keeps that orchestration private while exposing a single
//! poll/wait surface to the rest of the request API, providing this functionality in a
//! backwards-compatible way (such that async usage of HTTP cache APIs still gives you what
//! looks like a `PendingRequest`, despite being a state machine under the hood).

use crate::convert::{ToHeaderName, ToHeaderValue};
use crate::handle::BodyHandle;
use crate::http::{CandidateResponse, HeaderName, HeaderValue};
use crate::{async_io, Request, Response};
use fastly_shared::CacheOverride;
use fastly_sys::fastly_cache::CacheLookupState;
use fastly_sys::fastly_http_req::PendingResponseKind;
use std::task::Poll;

use super::super::cache::{self, HttpCacheHandle};
use super::super::{AfterSend, SendError, SendErrorCause};
use super::handle::{PendingRequestHandle, PollHandleResult};

/// Best-effort background completion for stale-while-revalidate responses.
#[derive(Debug)]
pub(crate) struct BackgroundRevalidation {
    /// The leader backend fetch started during stale-while-revalidate.
    ///
    /// Dropping this value finishes the fetch, applies the resulting cache candidate in the
    /// background, and then discards the response.
    pending: Option<AwaitBackendState>,
}

/// Internal state machine backing a `PendingRequest`.
#[derive(Debug)]
pub(super) struct PendingRequestStateMachine {
    state: State,
}

#[doc=include_str!("state_machine_diagram.md")]
#[derive(Debug)]
enum State {
    /// A plain host-backed pending request with no guest-side cache orchestration remaining.
    Direct(PendingRequestHandle),
    /// Waiting for the cache lookup / collapse decision to become ready.
    AwaitCache(AwaitCacheState),
    /// Waiting for the backend response that will populate or update cache.
    AwaitBackend(AwaitBackendState),
}

#[derive(Debug)]
enum PendingHeaderOp {
    Insert(HeaderName, HeaderValue),
    Append(HeaderName, HeaderValue),
    Remove(HeaderName),
}

#[derive(Debug, Default)]
struct PendingHeaderOps(Vec<(PendingResponseKind, PendingHeaderOp)>);

impl PendingHeaderOps {
    fn set_header(&mut self, name: HeaderName, value: HeaderValue, target: PendingResponseKind) {
        self.0.push((target, PendingHeaderOp::Insert(name, value)));
    }

    fn append_header(&mut self, name: HeaderName, value: HeaderValue, target: PendingResponseKind) {
        self.0.push((target, PendingHeaderOp::Append(name, value)));
    }

    fn remove_header(&mut self, name: HeaderName, target: PendingResponseKind) {
        self.0.push((target, PendingHeaderOp::Remove(name)));
    }

    fn apply_response(&self, resp: &mut Response) {
        for (target, op) in self.0.as_slice() {
            if *target == PendingResponseKind::Error {
                continue;
            }

            match op {
                PendingHeaderOp::Insert(name, value) => {
                    resp.set_header(name, value);
                }
                PendingHeaderOp::Append(name, value) => {
                    resp.append_header(name, value);
                }
                PendingHeaderOp::Remove(name) => {
                    let _ = resp.remove_header(name);
                }
            }
        }
    }

    fn apply_handle(&self, pending: &PendingRequestHandle) {
        for (target, op) in self.0.as_slice() {
            match op {
                PendingHeaderOp::Insert(name, value) => {
                    pending.set_response_header(name, value, *target);
                }
                PendingHeaderOp::Append(name, value) => {
                    pending.append_response_header(name, value, *target);
                }
                PendingHeaderOp::Remove(name) => {
                    pending.remove_response_header(name, *target);
                }
            }
        }
    }

    fn take(&mut self) -> Self {
        std::mem::take(self)
    }
}

/// State for a pending cache lookup / collapse decision.
#[derive(Debug)]
struct AwaitCacheState {
    /// The in-flight cache transaction for this request.
    cache_handle: HttpCacheHandle,
    /// The original request, retained so we can either pass it through directly or derive the
    /// suggested backend request if this request becomes the collapse leader.
    req: Box<Request>,
    /// Backend used for any eventual origin fetch.
    backend_name: String,
    /// Pending header changes to put on the response after it's been cached.
    pending_header_ops: PendingHeaderOps,
}

/// State for a cache-transaction leader waiting on an origin fetch.
#[derive(Debug)]
struct AwaitBackendState {
    /// The cache transaction to be completed once the backend response is available.
    cache_handle: HttpCacheHandle,
    /// The host pending request for the backend fetch itself.
    pending_req_handle: PendingRequestHandle,
    /// Optional hook to run after the backend response is available but before it is committed.
    after_send: Option<AfterSend>,
    /// Final cache override computed after `before_send` runs on the suggested backend request.
    cache_override: CacheOverride,
    /// Whether stale-if-error fallback is allowed for this leader fetch.
    usable_if_error: bool,
    /// Pending header changes to put on the response after it's been cached.
    pending_header_ops: PendingHeaderOps,
}

/// The next action to take once a cache lookup has become ready.
enum LookupStep {
    /// A cached response can be returned immediately.
    //
    // Boxed due to the large size.
    Immediate(Box<Response>),
    /// The request became the collapse leader and must await a backend response.
    AwaitBackend(AwaitBackendState),
    /// Cache population is disabled for this transaction, so the original request should bypass
    /// cache and go directly to origin.
    Direct(PendingRequestHandle),
}

/// Open the cache transaction for a request that will use guest-side caching.
fn begin_lookup(req: &mut Request, backend: &str) -> Result<HttpCacheHandle, SendErrorCause> {
    let options = cache::LookupOptions {
        override_key: req.get_override_cache_key(),
        backend_name: backend.to_owned(),
    };
    // Clear out the cache key so that it's not inserted as a header
    // (which is used for host-side caching mode).
    req.metadata.override_cache_key = None;
    Ok(cache::transaction_lookup(
        req.lazy_handle.get_handle(),
        &options,
    )?)
}

impl PendingRequestStateMachine {
    /// Create the trivial state machine variant for direct host-backed pending requests.
    pub(super) fn new(handle: PendingRequestHandle) -> Self {
        Self {
            state: State::Direct(handle),
        }
    }

    /// Create a pending request that may require guest-side cache orchestration to complete.
    ///
    /// Non-cacheable requests fall back to the direct host async path immediately. Cacheable
    /// requests begin with a cache lookup and may later transition to a backend fetch or a direct
    /// passthrough request depending on the lookup result.
    pub(super) fn with_guest_caching(
        mut req: Request,
        backend_name: &str,
    ) -> Result<Self, SendErrorCause> {
        if !req.is_cacheable() {
            let handle = req
                .take_request_handle()
                .send_async_without_caching(req.take_body_handle(), backend_name)?;
            return Ok(Self::new(handle));
        }

        Ok(Self {
            state: State::AwaitCache(AwaitCacheState {
                cache_handle: begin_lookup(&mut req, backend_name)?,
                req: Box::new(req),
                backend_name: backend_name.to_owned(),
                pending_header_ops: PendingHeaderOps::default(),
            }),
        })
    }

    pub(super) fn set_response_header(
        &mut self,
        name: impl ToHeaderName,
        value: impl ToHeaderValue,
        target: PendingResponseKind,
    ) {
        match &mut self.state {
            State::AwaitCache(state) => {
                state
                    .pending_header_ops
                    .set_header(name.into_owned(), value.into_owned(), target);
            }
            State::AwaitBackend(state) => {
                state
                    .pending_header_ops
                    .set_header(name.into_owned(), value.into_owned(), target);
            }
            State::Direct(handle) => {
                handle.set_response_header(name, value, target);
            }
        }
    }

    pub(super) fn append_response_header(
        &mut self,
        name: impl ToHeaderName,
        value: impl ToHeaderValue,
        target: PendingResponseKind,
    ) {
        match &mut self.state {
            State::AwaitCache(state) => {
                state.pending_header_ops.append_header(
                    name.into_owned(),
                    value.into_owned(),
                    target,
                );
            }
            State::AwaitBackend(state) => {
                state.pending_header_ops.append_header(
                    name.into_owned(),
                    value.into_owned(),
                    target,
                );
            }
            State::Direct(handle) => {
                handle.append_response_header(name, value, target);
            }
        }
    }

    pub(super) fn remove_response_header(
        &mut self,
        name: impl ToHeaderName,
        target: PendingResponseKind,
    ) {
        match &mut self.state {
            State::AwaitCache(state) => {
                state
                    .pending_header_ops
                    .remove_header(name.into_owned(), target);
            }
            State::AwaitBackend(state) => {
                state
                    .pending_header_ops
                    .remove_header(name.into_owned(), target);
            }
            State::Direct(handle) => {
                handle.remove_response_header(name, target);
            }
        }
    }

    pub(super) fn into_direct_handle(mut self) -> Option<PendingRequestHandle> {
        match &mut self.state {
            State::Direct(handle) => {
                let handle = handle.take();
                if handle.is_invalid() {
                    None
                } else {
                    Some(handle)
                }
            }
            State::AwaitCache(_) | State::AwaitBackend(_) => None,
        }
    }

    pub(super) fn wait_handle(&self) -> u32 {
        match &self.state {
            State::Direct(handle) => handle.as_u32(),
            State::AwaitCache(state) => state.cache_handle.as_abi(),
            State::AwaitBackend(state) => state.pending_req_handle.as_u32(),
        }
    }

    /// Block until this state machine yields a terminal response or error.
    ///
    /// `poll()` remains the source of truth for all immediate guest-side transitions. This helper
    /// only wraps that nonblocking progression in the current host wait loop.
    pub(super) fn wait(&mut self) -> Result<Response, SendErrorCause> {
        loop {
            match self.poll() {
                Poll::Ready(Ok(resp)) => return Ok(resp),
                Poll::Ready(Err(err)) => return Err(err),
                Poll::Pending => {
                    // Once `poll()` returns `Pending`, all guest-side work for the current turn has
                    // been exhausted. The selected handle therefore represents the next host event
                    // required to make further progress.
                    async_io::select(&[self.wait_handle()])
                        .map_err(|status| SendErrorCause::InternalError(Some(status)))?;
                }
            }
        }
    }

    /// Advance this pending request as far as possible without blocking.
    ///
    /// The loop is intentional: cache-related states can transition immediately into another state
    /// without needing a fresh host readiness event. Callers only observe `Pending` when a new host
    /// event is actually required.
    pub(super) fn poll(&mut self) -> Poll<Result<Response, SendErrorCause>> {
        loop {
            match &mut self.state {
                State::Direct(handle) => match handle.take().poll() {
                    PollHandleResult::Pending(next_handle) => {
                        *handle = next_handle;
                        return Poll::Pending;
                    }
                    PollHandleResult::Done(Ok((resp, body))) => {
                        return Poll::Ready(Ok(Response::from((resp, body))));
                    }
                    PollHandleResult::Done(Err(err)) => return Poll::Ready(Err(err)),
                },
                State::AwaitCache(state) => match state.poll() {
                    Poll::Pending => return Poll::Pending,
                    Poll::Ready(Ok(LookupStep::Immediate(mut resp))) => {
                        // Apply the pending headers to this cached response:
                        state.pending_header_ops.apply_response(&mut resp);

                        return Poll::Ready(Ok(*resp));
                    }
                    Poll::Ready(Ok(LookupStep::AwaitBackend(next))) => {
                        self.state = State::AwaitBackend(next);
                    }
                    Poll::Ready(Ok(LookupStep::Direct(handle))) => {
                        self.state = State::Direct(handle);
                    }
                    Poll::Ready(Err(err)) => return Poll::Ready(Err(err)),
                },
                State::AwaitBackend(state) => return state.poll(),
            }
        }
    }

    /// Advance this pending request until we are able to handoff something to send
    /// to the downstream client:
    pub(super) fn send_to_client(mut self, finalizer: super::Finalizer) -> Result<(), SendError> {
        loop {
            // If this is direct pass or hit-for-pass, then we can handoff
            // the `PendingRequestHandle` itself to the host and return:
            if let State::Direct(handle) = &mut self.state {
                finalizer.into_pending(handle.take()).send_to_client();
                return Ok(());
            }

            // Otherwise, wait for something to change before checking again:
            if let Err(status) = async_io::select(&[self.wait_handle()]) {
                let cause = SendErrorCause::InternalError(Some(status));
                let err = finalizer.into_error(cause);
                return Err(err);
            }

            // Then, verify there's no guest-side work to be advanced, and
            // that we don't already have a response in hand (e.g. one that's
            // been cached):
            match self.poll() {
                Poll::Ready(Ok(resp)) => {
                    finalizer.into_response(resp).send_to_client();
                    return Ok(());
                }
                Poll::Ready(Err(cause)) => {
                    return Err(finalizer.into_error(cause));
                }
                Poll::Pending => {
                    // Nothing for us to do, loop and check for handoff.
                }
            }
        }
    }
}

impl AwaitCacheState {
    /// Advance the cache lookup state without blocking.
    fn poll(&mut self) -> Poll<Result<LookupStep, SendErrorCause>> {
        match async_io::is_ready(self.cache_handle.as_abi()) {
            Ok(false) => return Poll::Pending,
            Ok(true) => {}
            Err(status) => {
                return Poll::Ready(Err(SendErrorCause::InternalError(Some(status))));
            }
        };

        let lookup_state = match self.cache_handle.wait() {
            Ok(state) => state,
            Err(err) => return Poll::Ready(Err(err.into())),
        };

        // A "usable" response may be fresh, within stale-while-revalidate, or within
        // stale-if-error after some previous leader completed a revalidation attempt.
        if let Ok(mut resp) = self.cache_handle.get_found_response(true) {
            // During stale-while-revalidate, we may be the collapse winner responsible for kicking
            // off a background revalidation while still returning the stale response immediately.
            if self.cache_handle.must_insert_or_update() {
                resp.background_revalidation =
                    self.start_backend_fetch(false)
                        .ok()
                        .map(|state| BackgroundRevalidation {
                            pending: Some(state),
                        });
            }
            if lookup_state.contains(CacheLookupState::USABLE_IF_ERROR) {
                // This stale-if-error response was selected after collapse; annotate the masked error.
                resp.masked_error = Some(SendErrorCause::RequestCollapse);
            }
            return Poll::Ready(Ok(LookupStep::Immediate(Box::new(resp))));
        }

        // get_found_response returned an error.
        // This doesn't mean "the cache transaction was a failure" (remember .wait() was OK),
        // but it means we don't have a response we can use right now.
        //
        // We'll have to go to the origin to get that response; either to fulfill our obligation
        // to freshen the cache, or because the cache contains a "skip caching" response.

        if self.cache_handle.must_insert_or_update() {
            // We've been told we need to freshen the cache.
            let usable_if_error = lookup_state.contains(CacheLookupState::USABLE_IF_ERROR);
            return Poll::Ready(match self.start_backend_fetch(usable_if_error) {
                Ok(state) => Ok(LookupStep::AwaitBackend(state)),
                Err(err) if usable_if_error => {
                    // The leader failed before producing a usable candidate, but stale-if-error is
                    // available. Tell the transaction to release the stale response instead.
                    self.cache_handle.transaction_choose_stale()?;
                    let mut response = self.cache_handle.get_found_response(false)?;
                    response.masked_error = Some(err);
                    Ok(LookupStep::Immediate(Box::new(response)))
                }
                Err(err) => Err(err),
            });
        }

        // We didn't have a "found response" in the cache... and we were told we don't need to
        // update the cache (!must_insert_or_update).
        //
        // If caching and request collapsing were available for this request, we'd either:
        // - Block for someone else to complete the request, and get their found response from
        //   get_found_response
        // - Get the obligation to refresh the cache (must_insert_or_update)
        //
        // But we didn't get_found_response, and we didn't get either of those signals.
        // By process of elimination, this means request collapsing/caching is disabled for this
        // request.
        //
        // At this point we jettison the cache machinery and continue as a plain direct pending
        // request against origin.
        let handle = self
            .req
            .take_request_handle()
            .send_async_without_caching(self.req.take_body_handle(), &self.backend_name)
            .inspect(|h| self.pending_header_ops.apply_handle(h))?;
        Poll::Ready(Ok(LookupStep::Direct(handle)))
    }

    /// Transition from a completed cache lookup into an async backend fetch.
    ///
    /// The cache transaction provides the suggested backend request. We run `before_send` against
    /// that request, capture the final cache override, and then begin the uncached origin fetch.
    /// Ownership of the cache transaction only moves into the backend state once all setup
    /// succeeds.
    fn start_backend_fetch(
        &mut self,
        usable_if_error: bool,
    ) -> Result<AwaitBackendState, SendErrorCause> {
        let mut backend_req = Request::from(self.cache_handle.get_suggested_backend_request()?)
            .with_body(self.req.take_body())
            .with_metadata(self.req.metadata.clone());
        self.req.metadata.invoke_before_send(&mut backend_req)?;

        let cache_override = backend_req.metadata.cache_override.clone();
        let (req_handle, req_body_handle) = backend_req.into_handles();
        let pending_req_handle = req_handle.send_async_without_caching(
            req_body_handle.unwrap_or_else(BodyHandle::new),
            &self.backend_name,
        )?;

        Ok(AwaitBackendState {
            cache_handle: self.cache_handle.take(),
            pending_req_handle,
            after_send: self.req.metadata.after_send.clone(),
            pending_header_ops: self.pending_header_ops.take(),
            cache_override,
            usable_if_error,
        })
    }
}

impl AwaitBackendState {
    /// Advance the cache-leader backend fetch to either a final response or a still-pending state.
    ///
    /// `poll_candidate()` is the single source of truth for turning the backend response into a
    /// cache candidate. This method applies the final cache-transaction policy on top of that:
    /// stream the candidate back on success, or substitute stale-if-error if permitted.
    fn poll(&mut self) -> Poll<Result<Response, SendErrorCause>> {
        let mut res = match self.poll_candidate() {
            Poll::Pending => return Poll::Pending,
            Poll::Ready(Ok(candidate)) => candidate.apply_and_stream_back(),
            Poll::Ready(Err(err)) if self.usable_if_error => self
                .cache_handle
                .transaction_choose_stale()
                .and_then(|_| self.cache_handle.get_found_response(false))
                .map(|mut response| {
                    response.masked_error = Some(err);
                    response
                })
                .map_err(SendErrorCause::from),
            Poll::Ready(Err(err)) => Err(err),
        };

        if let Ok(resp) = &mut res {
            // Apply pending headers before returning our value:
            self.pending_header_ops.apply_response(resp);
        }

        Poll::Ready(res)
    }

    /// Advance the backend fetch until either a cache candidate or a terminal error is available.
    ///
    /// If candidate construction or `after_send` fails, the cache handle is restored into `self`
    /// before returning so that stale-if-error fallback or transaction abandonment can still occur
    /// from the surrounding state.
    fn poll_candidate(&mut self) -> Poll<Result<CandidateResponse, SendErrorCause>> {
        match self.pending_req_handle.take().poll() {
            PollHandleResult::Pending(handle) => {
                self.pending_req_handle = handle;
                Poll::Pending
            }
            PollHandleResult::Done(Ok((resp_handle, resp_body_handle))) => {
                let candidate = CandidateResponse::new(
                    self.cache_handle.take(),
                    &self.cache_override,
                    resp_handle,
                    resp_body_handle,
                )
                .map_err(|(err, cache_handle)| {
                    self.cache_handle = cache_handle;
                    err
                });
                Poll::Ready(candidate.and_then(|mut candidate| {
                    if let Some(f) = &self.after_send {
                        if let Err(e) = (f.after_send)(&mut candidate) {
                            self.cache_handle = candidate.into_cache_handle();
                            return Err(e);
                        }
                    }
                    Ok(candidate)
                }))
            }
            PollHandleResult::Done(Err(err)) => Poll::Ready(Err(err)),
        }
    }

    /// Block until this leader fetch yields a cache candidate or a terminal error.
    ///
    /// Background revalidation uses this narrower wait helper because it needs to apply the
    /// resulting candidate in the background rather than stream a response back to the caller.
    fn wait_for_candidate(&mut self) -> Result<CandidateResponse, SendErrorCause> {
        loop {
            match self.poll_candidate() {
                Poll::Ready(Ok(candidate)) => return Ok(candidate),
                Poll::Ready(Err(err)) => return Err(err),
                Poll::Pending => {
                    // In this state, only the host pending request can produce additional progress.
                    async_io::select(&[self.pending_req_handle.as_u32()])
                        .map_err(|status| SendErrorCause::InternalError(Some(status)))?;
                }
            }
        }
    }
}

impl Drop for BackgroundRevalidation {
    fn drop(&mut self) {
        let Some(mut pending) = self.pending.take() else {
            return;
        };
        // Background revalidation is intentionally best-effort. Once the caller has moved on with
        // the stale response, we finish the leader fetch if possible and silently ignore errors.
        pending
            .wait_for_candidate()
            .ok()
            .and_then(|candidate| candidate.apply_in_background().ok());
    }
}