redirectionio 2.5.2

Redirection IO Library to handle matching rule, redirect and filtering headers and body.
use crate::router::RouterConfig;
use http::uri::PathAndQuery;
use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use url::form_urlencoded::parse as parse_query;

const URL_ENCODE_SET: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>');
const QUERY_ENCODE_SET: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>').add(b'+');

#[derive(Serialize, Deserialize, Debug, Clone, Hash)]
pub struct PathAndQueryWithSkipped {
    pub path_and_query: String,
    pub path_and_query_matching: Option<String>,
    pub skipped_query_params: Option<String>,
    pub original: String,
}

pub fn sanitize_url(path_and_query_str: &str) -> String {
    utf8_percent_encode(path_and_query_str, URL_ENCODE_SET).to_string()
}

impl PathAndQueryWithSkipped {
    pub fn from_static(path_and_query_str: &str) -> Self {
        Self {
            path_and_query: path_and_query_str.to_string(),
            path_and_query_matching: Some(path_and_query_str.to_string()),
            original: path_and_query_str.to_string(),
            skipped_query_params: None,
        }
    }

    pub fn from_config(config: &RouterConfig, path_and_query_str: &str) -> Self {
        let url = sanitize_url(path_and_query_str);

        if !config.ignore_marketing_query_params {
            return Self {
                path_and_query_matching: Some(if config.ignore_path_and_query_case {
                    url.to_lowercase()
                } else {
                    url.clone()
                }),
                path_and_query: url,
                original: path_and_query_str.to_string(),
                skipped_query_params: None,
            };
        }

        let path_and_query: PathAndQuery = match url.parse() {
            Ok(p) => p,
            Err(err) => {
                log::error!(
                    "cannot parse url '{}', cancel ignoring marketing query params: {}",
                    path_and_query_str,
                    err
                );

                return Self {
                    path_and_query_matching: Some(if config.ignore_path_and_query_case {
                        url.to_lowercase()
                    } else {
                        url.clone()
                    }),
                    path_and_query: url,
                    original: path_and_query_str.to_string(),
                    skipped_query_params: None,
                };
            }
        };

        let mut new_path_and_query = path_and_query.path().to_string();
        let mut skipped_query_params = "".to_string();

        if let Some(query) = path_and_query.query() {
            let hash_query: BTreeMap<_, _> = parse_query(query.as_bytes()).into_owned().collect();
            let mut query_string = "".to_string();

            for (key, value) in &hash_query {
                let mut query_param = "".to_string();

                query_param.push_str(&utf8_percent_encode(key, QUERY_ENCODE_SET).to_string());

                if !value.is_empty() {
                    query_param.push('=');
                    query_param.push_str(&utf8_percent_encode(value, QUERY_ENCODE_SET).to_string());
                }

                if config.marketing_query_params.contains(key) {
                    if !skipped_query_params.is_empty() {
                        skipped_query_params.push('&')
                    }

                    skipped_query_params.push_str(query_param.as_str())
                } else {
                    if !query_string.is_empty() {
                        query_string.push('&');
                    }

                    query_string.push_str(query_param.as_str())
                }
            }

            if !query_string.is_empty() {
                new_path_and_query.push('?');
                new_path_and_query.push_str(query_string.as_str());
            }
        }

        Self {
            path_and_query_matching: Some(if config.ignore_path_and_query_case {
                new_path_and_query.to_lowercase()
            } else {
                new_path_and_query.clone()
            }),
            path_and_query: new_path_and_query,
            original: path_and_query_str.to_string(),
            skipped_query_params: if config.pass_marketing_query_params_to_target && !skipped_query_params.is_empty() {
                Some(skipped_query_params)
            } else {
                None
            },
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::http::query::sanitize_url;
    use http::uri::PathAndQuery;

    fn test_url(path: &str) {
        let sanitized = sanitize_url(path);
        let url = sanitized.parse::<PathAndQuery>();

        println!("{:#?}", url);

        assert!(url.is_ok());
    }

    #[test]
    fn test_url_1() {
        test_url("/npoplayer.html?tx_eonpo_npoplayer%5Bmid%5D=WO_EO_16582885&tx_eonpo_npoplayer%5Bhash%5D=45a69ca57ac8eee5025d45f06f9910f85fd9a0db814c590fg560293d<549880f&tx_eonpo_npoplayer%5Boverlay%5D=https%3A%2F%2Fblauwbloed.eo.nl%2Ffileadmin%2Fbestanden-2016%2Fuser_upload%2F2021-07%2FKoninklijk_gezin_fotosessie_zomer_2021.jpg&tx_eonpo_npoplayer%5Bhasadconsent%5D=0&tx_eonpo_npoplayer%5Breferralurl%5D=https%3A%2F%2Fblauwbloed.eo.nl%2Fartikel%2F2021%2F07%2Fkijk-de-eerste-foto-van-de-fotosessie-van-de-oranjes&tx_eonpo_npoplayer%5BsterSiteId%5D=blauwbloed&tx_eonpo_npoplayer%5BsterIdentifier%5D=blauwbloed-ios-smartphone&tx_eonpo_npoplayer%5BatinternetSiteId%5D=25&tx_eonpo_npoplayer%5BatinternetUserId%5D=287dbe14-d677-4b9b-8eeb-ecb389349db1&tx_eonpo_npoplayer%5BatinternetUserIdCookieDuration%5D=394");
    }

    #[test]
    fn test_url_2() {
        test_url("/fileadmin/bestanden-2016/_processed_/5/5/csm_Echte_vriendschap_Vanaf_de_eerste_dag_van_hun_studie_zijn_Inge_en_Julia_vrjendinnep_2_8260<c0281.jtg");
    }
}