spectrust_fastly_worker 0.1.0

SpecTrust library to integrate Spec Proxy with Fastly Compute@Edge
Documentation
use std::net::Ipv4Addr;

use cookie::{time::Duration, Cookie};
use fastly::{
    http::{
        header::{COOKIE, SET_COOKIE},
        HeaderValue,
    },
    Error as FastlyError, Request, Response,
};
use uuid::Uuid;

use crate::{Error, SpecConfiguration, SpecProxyMode};

/// header that controls request forwarding for Spec Proxy
const SPEC_HEADER_FORWARD_ORIGIN: &str = "x-spec-forward-origin";
/// cookie key for the Spec ID
const SPEC_COOKIE_ID: &str = "x-spec-id";
/// standard X-Forwarded-For header, Note: for some reason, not included in http crate?
const HEADER_X_FORWARDED_FOR: &str = "x-forwarded-for";

/// Transition object used to relay information between request and response.
///
/// The [`RequestMetadata`] object relays information about the request made from a call to
/// [`spec_proxy_process_request`] to the [`spec_proxy_process_response`] function. This
/// includes whether or not we should process traffic from the given IP address, or if we
/// should add any cookies to the Response.
///
/// This object is only used if you manually call [`spec_proxy_process_request`] and
/// [`spec_proxy_process_response`] yourself. If you have additional processing work that
/// need be done on the request before you send it, you might need to split a call to
/// [`spec_proxy_process`] into its two parts, request and response.
#[derive(Debug, Default)]
struct RequestMetadata<'a> {
    /// Indicates whether or not the Spec Cookie was present on the Request
    has_spec_cookie: bool,
    /// Indicates whether or not Spec Proxy should process the Request or Response
    should_handle_request: bool,
    /// The Fastly backend that clients should use to serve the request
    fastly_backend: &'a str,
}

impl<'a> RequestMetadata<'a> {
    /// This method exposes which Fastly Backend this request should contact.
    pub fn fastly_backend(&self) -> &'a str {
        self.fastly_backend
    }
}

/// This function is the workhorse of the Spec Proxy library.
///
/// We'll accept a [`Request`] and a [`SpecConfiguration`] and from this create and
/// execute the necessary requests for Spec Proxy to function. The return value is a [`Result`]
/// containing either the successful [`Response`] or an error describing what
/// occurred.
///
/// *Note:* you must configure the Fastly Backends
/// by using the [`SpecConfiguration::builder()`] constructor to map request hosts into their
/// Fastly counterparts, which allows the Compute@Edge worker to make outgoing requests.
///
/// Fastly Backends can be created for Compute@Edge workers either through the UI or the Fastly
/// CLI. You must also inform the Spec Proxy library what these backend values are for proper
/// operation.
///
/// [Fastly Backend Documentation](https://developer.fastly.com/reference/api/services/backend/)
///
/// # Example Usage
///
/// The project can be constructed by following [Fastly's documentation](https://developer.fastly.com/learning/compute)
/// and selecting Rust when asked which lanugage you would like to use. Then use the following code to
/// integrate the Spec Proxy library.
///
/// ```no_run
/// use fastly::{Error, Request, Response};
///
/// use spectrust_fastly_worker::{spec_proxy_process, SpecConfiguration, SpecProxyMode};
///
/// #[fastly::main]
/// fn main(mut request: Request) -> Result<Response, Error> {
///     let config = SpecConfiguration::builder([
///         (
///             "www.example.com",
///             "example_origin",
///         ),
///         (
///             "www.example.com.specprotected.com",
///             "example_spec_proxy_origin",
///         ),
///     ].into())
///     .with_operating_mode(SpecProxyMode::Inline)
///     .build();
///
///     spec_proxy_process(request, &config)
/// }
/// ```
pub fn spec_proxy_process(
    request: Request,
    config: &SpecConfiguration,
) -> Result<Response, FastlyError> {
    // Note: if config.disable_spec_proxy() is true or if should_handle_request is false,
    // this will perform the Request as the edge worker normally would.
    let (request, metadata) = spec_proxy_process_request(request, config)?;
    request
        .send(metadata.fastly_backend())
        .map(|response| spec_proxy_process_response(response, metadata))
        .map_err(Into::into)
}

/// [`Request`](fastly::Request) processing component of the Spec Proxy library.
///
/// Process the incoming Fastly [`Request`](fastly::Request) according to the [`SpecConfiguration`] object.
/// This function produces a [`Request`] and a [`RequestMetadata`] on successful execution.
/// The [`RequestMetadata`] object should be supplied to the [`spec_proxy_process_response`]
/// function which will pass along internal details. We'll return an error in the result of an
/// improper [`Url`](fastly::http::Url), a missing Fastly Backend for our hostname, and other similar
/// errors.
///
/// Note: this function will not result in an error when operating in [`SpecProxyMode::Listening`]
/// if the request to Spec Proxy fails. This is to keep the system robust against any errors and
/// keep user traffic flowing without interruption.
fn spec_proxy_process_request<'a>(
    mut request: Request,
    config: &'a SpecConfiguration,
) -> Result<(Request, RequestMetadata<'a>), Error> {
    let mut metadata = RequestMetadata {
        should_handle_request: false,
        has_spec_cookie: has_spec_cookie(request.get_header_all(COOKIE)),
        fastly_backend: request
            .get_url()
            .host_str()
            .ok_or(Error::MissingHost)
            .and_then(|host| config.backend_for_host(host))?,
    };
    if config.disable_spec_proxy() {
        return Ok((request, metadata));
    }

    if !should_handle_request(
        ip_from_x_forwarded_for(request.get_header(HEADER_X_FORWARDED_FOR)).ok(),
        config,
    ) {
        return Ok((request, metadata));
    }

    // mark that we should handle the request
    metadata.should_handle_request = true;

    // Note: cloned so we can also take a mutable reference to the Url
    let host = request
        .get_url()
        .host_str()
        .map(|h| h.to_string())
        .ok_or(Error::MissingHost)?;

    match config.operating_mode() {
        SpecProxyMode::Inline => {
            let url = request.get_url_mut();
            url.set_host(Some(format!("{}.specprotected.com", host).as_str()))
                .ok();

            request.set_header(SPEC_HEADER_FORWARD_ORIGIN, format!("https://{}", host));
        }
        SpecProxyMode::Listening => {
            // Note: we want to avoid cloning the body, but we need a feature request
            let mut proxy_request = request.clone_with_body();
            let url = proxy_request.get_url_mut();
            let newhost = format!("{}.specprotected.com", host);
            url.set_host(Some(newhost.as_str())).ok();
            // Always send the request, even if it's cached, and don't cache the response.
            // Something about Fastly is taking our request and using it as the cache for
            // the original request, even though they don't have the same url nor the same
            // Fastly Backend.
            proxy_request.set_pass(true);

            match config.backend_for_host(&newhost) {
                Ok(backend) => {
                    proxy_request.send_async(backend).ok();
                }
                _ => log::error!(
                    "Spec Proxy: could not find Fastly Backend for host: {}",
                    &newhost
                ),
            };
        }
    };

    // ensure we set the correct request backend
    metadata.fastly_backend = request
        .get_url()
        .host_str()
        .ok_or(Error::MissingHost)
        .and_then(|host| config.backend_for_host(host))?;

    Ok((request, metadata))
}

/// [`Response`] processing component of the Spec Proxy library.
///
/// Process the [`Response`] along with information provided by the [`RequestMetadata`] object.
/// We'll return the resulting [`Response`] with any required modifications, which can be returned
/// to the user from the Compute@Edge worker.
fn spec_proxy_process_response(mut response: Response, metadata: RequestMetadata) -> Response {
    if metadata.should_handle_request && !metadata.has_spec_cookie {
        response.append_header(
            SET_COOKIE,
            Cookie::build(SPEC_COOKIE_ID, Uuid::new_v4().to_string())
                .path("/")
                .max_age(Duration::days(365 * 10)) // 10 years
                .finish()
                .to_string(),
        );
    }

    response
}

/// Determines whether or not Spec Proxy should process the request.
///
/// This decision is based on properties of the [`SpecConfiguration`] object, such
/// as the percentage of routed IP traffic.
fn should_handle_request(ip: Option<Ipv4Addr>, config: &SpecConfiguration) -> bool {
    // if the filter is 100% we should always handle traffic.
    let percentage_of_ips = config.percentage_of_ips();
    if percentage_of_ips >= 100 {
        return true;
    }
    // early abort if it's impossible to match
    else if percentage_of_ips == 0 {
        return false;
    }

    match ip {
        // if we don't have an IP, don't assume anything about routing
        None => false,
        Some(ip) => {
            // split up the ip address into octets, convert them to integers, and then sum them.
            // default the string to 99 so if, for some reason, there's a problem the traffic
            // doesn't go through unless it's at 100%. Note: 99 because there's 100 numbers in
            // [0, 99]!
            // Note: convert octet to u16 so sum doesn't overflow, max sum is 255 * 4 = 1020
            let ip_octet_sum: u16 = ip.octets().iter().map(|&o| o as u16).sum();
            // Note: shadowing previous sum into a u8 because modulo 100 is guaranteed to fit in u8
            let ip_octet_sum: u8 = (ip_octet_sum % 100) as u8;

            // not `<=` because it's a percentage, e.g. "allow 1%" would allow
            // IP octect sums that result in `0`, which is 1 slice in the range [0, 99]
            ip_octet_sum < percentage_of_ips
        }
    }
}

/// Extract an IP address from a [`HeaderValue`]. This function produces an error if we cannot
/// properly parse an IP address from the header.
///
/// Note: Pass in the X-Forwarded-For header after extracting it from a Fastly [`Request`] by calling
/// [get_header](https://docs.rs/fastly/latest/fastly/http/struct.Response.html#method.get_header). This
/// allows for unit testing this function.
///
/// [RFC 7239](https://datatracker.ietf.org/doc/html/rfc7239)
/// [MDN X-Forwarded-For](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For)
fn ip_from_x_forwarded_for(
    x_forwarded_for_header: Option<&HeaderValue>,
) -> Result<Ipv4Addr, Error> {
    x_forwarded_for_header
        .and_then(|h| h.to_str().ok())
        .and_then(|h| h.split(',').next())
        .unwrap_or("")
        .parse()
        .map_err(Into::into)
}

/// Check for the existence of the "x-spec-id" cookie in an iterator of [`HeaderValue`].
///
/// Note: Pass in all of the Cookie headers after extracting it from a Fastly [`Request`] by calling
/// [get_header_all](https://docs.rs/fastly/latest/fastly/http/struct.Response.html#method.get_header_all).
/// This allows for unit testing this function.
fn has_spec_cookie<'a>(cookie_headers: impl Iterator<Item = &'a HeaderValue>) -> bool {
    cookie_headers
        .filter_map(|value| value.to_str().ok())
        .flat_map(|value| value.split(';'))
        .filter_map(|cookies_str| Cookie::parse(cookies_str).ok())
        .any(|cookie| cookie.name() == SPEC_COOKIE_ID)
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn should_handle_request_ips() {
        [
            ("0.0.0.0", 100, true),
            ("0.0.0.40", 100, true),
            ("0.0.0.99", 100, true),
            ("0.0.0.0", 100, true),
            ("0.0.0.40", 50, true),
            ("0.0.0.49", 50, true),
            ("0.0.0.50", 50, false),
            ("0.0.0.60", 50, false),
            ("0.0.0.0", 0, false),
            ("0.0.0.40", 0, false),
            ("0.0.0.99", 0, false),
            ("0.0.0.100", 0, false),
            // some legit-looking IPs that don't exploit our algorithm for testing
            ("24.68.195.11", 50, false),
            ("74.232.255.255", 50, true),
            ("89.2.79.2", 50, false),
            ("67.67.67.67", 50, false),
            ("10.0.0.8", 50, true),
        ]
        .into_iter()
        .for_each(|(ip, percentage_of_ips, expected)| {
            let ip = ip.parse().ok();
            let config = SpecConfiguration::builder(Default::default())
                .with_percentage_of_ips(percentage_of_ips)
                .build();

            assert_eq!(should_handle_request(ip, &config), expected);
        });
    }

    #[test]
    fn extract_ip_from_x_forwarded_for() {
        [
            ("24.68.195.11", Ipv4Addr::new(24, 68, 195, 11)),
            ("74.232.255.255", Ipv4Addr::new(74, 232, 255, 255)),
            ("89.2.79.2", Ipv4Addr::new(89, 2, 79, 2)),
            ("67.67.67.67", Ipv4Addr::new(67, 67, 67, 67)),
            // and if proxy IPs are involved, though people rarely do this anymore
            (
                "24.68.195.11, 127.0.0.1, 10.9.23.11",
                Ipv4Addr::new(24, 68, 195, 11),
            ),
            (
                "74.232.255.255, 127.0.0.1, 10.9.23.11",
                Ipv4Addr::new(74, 232, 255, 255),
            ),
        ]
        .into_iter()
        .for_each(|(value, expected)| {
            dbg!(&value, &expected);
            let value: Option<HeaderValue> = value.try_into().ok();
            assert_eq!(ip_from_x_forwarded_for(value.as_ref()).unwrap(), expected);
        });
    }

    #[test]
    fn examine_has_spec_cookie() {
        [
            (vec!["a=bcd; two=lsdkfjsldkfj; another_one=testing"], false),
            (vec![""], false),
            (vec![], false),
            (
                vec!["a=bcd; two=lsdkfjsldkfj; x-spec-id=something; another_one=testing"],
                true,
            ),
            (vec!["x-spec-id=something"], true),
            (
                vec![
                    "a=bcd; two=lsdkfjsldkfj; another_one=testing",
                    "more=cookies; for=you",
                ],
                false,
            ),
            (
                vec![
                    "a=bcd; two=lsdkfjsldkfj; x-spec-id=something; another_one=testing",
                    "more=cookies; for=you",
                ],
                true,
            ),
            (
                vec![
                    "a=bcd; two=lsdkfjsldkfj; another_one=testing",
                    "more=cookies; x-spec-id=something; for=you",
                ],
                true,
            ),
        ]
        .into_iter()
        .for_each(|(cookie, expected)| {
            dbg!(&cookie, &expected);
            let headers = cookie
                .into_iter()
                .map(|v| v.try_into().unwrap())
                .collect::<Vec<_>>();
            assert_eq!(has_spec_cookie(headers.iter()), expected);
        });
    }
}