use std::str::FromStr;
use bytes::Bytes;
use data_collection::{process_from_html, process_from_json};
use http::header::{CACHE_CONTROL, ETAG, EXPIRES, LAST_MODIFIED};
use http::response::Parts;
use http::{HeaderName, HeaderValue};
use crate::config;
use crate::proxy::context::incoming::RequestHandle;
use crate::proxy::set_edgee_header;
use crate::tools::{
self,
edgee_cookie::{self},
};
use html::{parse_html, Document};
pub mod data_collection;
pub mod html;
pub async fn html_handler(
body: &str,
request: &RequestHandle,
response: &mut Parts,
) -> Result<Document, &'static str> {
if !body.contains(r#"id="__EDGEE_SDK__""#) {
Err("compute-aborted(no-sdk)")?;
}
let mut document = parse_html(body, request.get_host().as_str());
if document.sdk_full_tag.is_empty() {
Err("compute-aborted(commented-sdk)")?;
}
if config::get().compute.enforce_no_store_policy {
response.headers.insert(
HeaderName::from_str(CACHE_CONTROL.as_ref()).unwrap(),
HeaderValue::from_str("private, no-store").unwrap(),
);
response.headers.remove(EXPIRES);
response.headers.remove(ETAG);
response.headers.remove(LAST_MODIFIED);
}
match do_process_payload(request, response) {
Ok(_) => {
if !edgee_cookie::has_cookie(request) {
set_edgee_header(response, "compute-aborted(no-cookie)");
} else {
let events = process_from_html(&document, request, response).await;
if let Some(events) = events {
document.data_collection_events = events;
}
}
}
Err(reason) => {
set_edgee_header(response, reason);
}
}
Ok(document)
}
pub async fn json_handler(
body: &Bytes,
request: &RequestHandle,
response: &mut Parts,
from_third_party_sdk: bool,
) -> Option<String> {
process_from_json(body, request, response, from_third_party_sdk).await
}
fn do_process_payload(request: &RequestHandle, response: &mut Parts) -> Result<bool, &'static str> {
let query = request.get_query().as_str();
if query.contains("disableEdgeDataCollection") {
Err("compute-aborted(disableEdgeDataCollection)")?;
}
if !config::get().compute.enforce_no_store_policy {
let res_headers = response
.headers
.iter()
.map(|(k, v)| (k.as_str().to_string(), v.to_str().unwrap().to_string()))
.collect::<std::collections::HashMap<String, String>>();
if tools::cacheable::check_cacheability(
&res_headers,
config::get().compute.behind_proxy_cache,
) {
Err("compute-aborted(cacheable)")?;
}
}
let purpose = request.get_header("purpose").unwrap_or("".to_string());
let sec_purpose = request.get_header("sec-purpose").unwrap_or("".to_string());
if purpose.contains("prefetch") || sec_purpose.contains("prefetch") {
Err("compute-aborted(prefetch)")?;
}
let set_fetch_dest = request
.get_header("sec-fetch-dest")
.unwrap_or("".to_string());
if !set_fetch_dest.is_empty() {
let forbidden_fetch_dest = [
"audio", "font", "image", "manifest", "script", "style", "video", "empty",
];
if forbidden_fetch_dest.contains(&set_fetch_dest.as_str()) {
Err("compute-aborted(fetch-dest)")?;
}
}
Ok(true)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::config::init_test_config;
use core::panic;
use http::header::{HeaderMap, COOKIE};
use pretty_assertions::assert_eq;
fn sample_html_full_minimal() -> String {
String::from(
"<html>
<head>
<title>ABC > DEF</title>
<!-- LEGACY STUFF HERE -->
<link rel=\"canonical\" href=\"https://test.com/test\"/>
<meta name=\"keywords\" content=\"k1, k2, k3\"/>
<script type=\"json\" id=\"__EDGEE_DATA_LAYER__\">{
\"data_collection\": {
\"events\": [
{
\"type\": \"track\",
\"data\": {\"name\": \"Event > name\"}
},
{
\"type\": \"page\",
\"data\": {\"name\": \"Page name\", \"title\": \"Page title\"}
},
{
\"type\": \"user\",
\"data\": {\"user_id\": \"123\", \"anonymous_id\": \"456\"}
}
]
}
}</script>
<script type=\"javascript\" id=\"__EDGEE_SDK__\" src=\"/_edgee/sdk.js\"/>
</head>
<body></body>
</html>",
)
}
fn sample_html_commented_sdk() -> String {
String::from(
"<html>
<head>
<title>ABC > DEF</title>
<!-- <script type=\"javascript\" id=\"__EDGEE_SDK__\" src=\"/_edgee/sdk.js\"/> -->
</head>
<body></body>
</html>",
)
}
fn empty_parts() -> Parts {
let response = http::response::Builder::new().status(200).body("").unwrap();
let (parts, _body) = response.into_parts();
parts
}
#[tokio::test]
async fn html_handler_with_sample_body() {
init_test_config();
let body_str = sample_html_full_minimal();
let request = RequestHandle::default();
let mut response = empty_parts();
match html_handler(&body_str, &request, &mut response).await {
Ok(document) => {
assert_eq!(document.title, "ABC > DEF");
assert_eq!(document.canonical, "https://test.com/test");
assert_eq!(document.keywords, "k1, k2, k3");
}
Err(reason) => {
panic!("Error: {}", reason);
}
}
}
#[tokio::test]
async fn html_handler_without_sdk() {
init_test_config();
let body_str = "X".repeat(1000);
let request = RequestHandle::default();
let mut response = empty_parts();
match html_handler(&body_str, &request, &mut response).await {
Ok(_document) => {
panic!("Should have failed");
}
Err(reason) => {
assert_eq!(reason, "compute-aborted(no-sdk)");
}
}
}
#[tokio::test]
async fn html_handler_with_commented_sdk() {
init_test_config();
let body_str = sample_html_commented_sdk();
let request = RequestHandle::default();
let mut response = empty_parts();
match html_handler(&body_str, &request, &mut response).await {
Ok(_document) => {
panic!("Should have failed");
}
Err(reason) => {
assert_eq!(reason, "compute-aborted(commented-sdk)");
}
}
}
#[tokio::test]
async fn html_handler_with_request_cookie() {
init_test_config();
let body_str = sample_html_full_minimal();
let mut headers = HeaderMap::new();
headers.insert(COOKIE, "edgee=abc".parse().unwrap());
let request = RequestHandle::default_with_headers(headers);
let mut response = empty_parts();
match html_handler(&body_str, &request, &mut response).await {
Ok(document) => {
assert_eq!(document.title, "ABC > DEF");
assert_eq!(
document.data_collection_events.contains("Event > name"),
true
);
}
Err(reason) => {
panic!("Error: {}", reason);
}
}
}
}