use std::sync::OnceLock;
use regex::Regex;
use super::token::TrackingTokenSigner;
use super::types::{LinkId, LinkMapping, MsgId};
fn anchor_regex() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(r#"(?i)(<a\b[^>]*?\bhref\s*=\s*)(?:"([^"]*)"|'([^']*)')([^>]*>)"#)
.expect("anchor regex compiles")
})
}
pub fn inject_pixel(html: &str, pixel_url: &str) -> String {
let tag = format!(
r#"<img src="{}" width="1" height="1" alt="" style="display:block;border:0;" />"#,
html_attr_escape(pixel_url),
);
if let Some(idx) = ci_rfind(html, "</body>") {
let mut out = String::with_capacity(html.len() + tag.len());
out.push_str(&html[..idx]);
out.push_str(&tag);
out.push_str(&html[idx..]);
out
} else {
let mut out = String::with_capacity(html.len() + tag.len());
out.push_str(html);
out.push_str(&tag);
out
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RewriteOutcome {
pub html: String,
pub mappings: Vec<LinkMapping>,
}
pub fn rewrite_links(
html: &str,
base_url: &str,
tenant_id: &str,
msg_id: &MsgId,
signer: &TrackingTokenSigner,
) -> RewriteOutcome {
let re = anchor_regex();
let mut mappings: Vec<LinkMapping> = Vec::new();
let mut next_id: u32 = 0;
let trimmed_base = base_url.trim_end_matches('/').to_string();
let rewritten = re.replace_all(html, |caps: ®ex::Captures<'_>| {
let prefix = caps.get(1).map_or("", |m| m.as_str());
let original = caps
.get(2)
.or_else(|| caps.get(3))
.map_or("", |m| m.as_str());
let suffix = caps.get(4).map_or("", |m| m.as_str());
if !is_trackable_href(original, &trimmed_base, tenant_id) {
return caps
.get(0)
.map_or(String::new(), |m| m.as_str().to_string());
}
let link_id = LinkId(format!("L{next_id}"));
next_id += 1;
let token = signer.sign_click(tenant_id, msg_id, &link_id);
let redir = format!(
"{base}/t/c/{tenant}/{msg}/{lid}?tag={tag}",
base = trimmed_base,
tenant = url_path_escape(tenant_id),
msg = url_path_escape(msg_id.as_str()),
lid = url_path_escape(link_id.as_str()),
tag = url_query_escape(token.as_str()),
);
mappings.push(LinkMapping {
link_id,
original_url: original.to_string(),
});
format!(r#"{prefix}"{redir}"{suffix}"#)
});
RewriteOutcome {
html: rewritten.into_owned(),
mappings,
}
}
fn is_trackable_href(href: &str, base_url: &str, tenant_id: &str) -> bool {
if href.is_empty() {
return false;
}
if href.starts_with('#') {
return false;
}
let lower = href.to_ascii_lowercase();
if !lower.starts_with("http://") && !lower.starts_with("https://") {
return false;
}
let prefix = format!("{}/t/c/{}/", base_url, tenant_id);
if href.starts_with(&prefix) {
return false;
}
true
}
fn html_attr_escape(s: &str) -> String {
s.replace('&', "&")
.replace('"', """)
.replace('<', "<")
.replace('>', ">")
}
fn url_path_escape(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for b in s.bytes() {
if matches!(
b,
b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~'
) {
out.push(b as char);
} else {
out.push_str(&format!("%{b:02X}"));
}
}
out
}
fn url_query_escape(s: &str) -> String {
url_path_escape(s)
}
fn ci_rfind(haystack: &str, needle: &str) -> Option<usize> {
let needle_lower = needle.to_ascii_lowercase();
haystack.char_indices().rev().find_map(|(i, _)| {
haystack
.get(i..i + needle.len())
.filter(|s| s.eq_ignore_ascii_case(&needle_lower))
.map(|_| i)
})
}
#[cfg(test)]
mod tests {
use super::*;
fn signer() -> TrackingTokenSigner {
TrackingTokenSigner::new(vec![0u8; 32]).unwrap()
}
#[test]
fn inject_pixel_before_body_close() {
let html = "<html><body>hi</body></html>";
let out = inject_pixel(html, "https://t.example/x");
assert!(out.contains("<img"));
assert!(out.contains("https://t.example/x"));
let img_idx = out.find("<img").unwrap();
let body_idx = out.find("</body>").unwrap();
assert!(img_idx < body_idx);
}
#[test]
fn inject_pixel_appends_when_no_body_tag() {
let html = "<p>hello</p>";
let out = inject_pixel(html, "https://t.example/x");
assert!(out.starts_with("<p>hello</p>"));
assert!(out.ends_with("/>"));
}
#[test]
fn inject_pixel_handles_mixed_case_body() {
let html = "<HTML><BODY>x</BODY></HTML>";
let out = inject_pixel(html, "https://t.example/x");
let img_idx = out.find("<img").unwrap();
let body_idx = out.to_ascii_lowercase().find("</body>").unwrap();
assert!(img_idx < body_idx);
}
#[test]
fn inject_pixel_html_escapes_url() {
let html = "<body>x</body>";
let out = inject_pixel(html, "https://t.example/x?a=1&b=2");
assert!(out.contains("&b=2"));
assert!(!out.contains("?a=1&b=2"));
}
#[test]
fn rewrite_links_swaps_http_anchors() {
let html = r#"<a href="https://acme.com/pricing">price</a>"#;
let r = rewrite_links(
html,
"https://t.example",
"acme",
&MsgId::new("m1"),
&signer(),
);
assert_eq!(r.mappings.len(), 1);
assert_eq!(r.mappings[0].original_url, "https://acme.com/pricing");
assert!(r.html.contains("/t/c/acme/m1/L0?tag="));
assert!(!r.html.contains("https://acme.com/pricing"));
}
#[test]
fn rewrite_links_assigns_sequential_ids() {
let html = r#"<a href="https://a.com/">a</a> <a href="https://b.com/">b</a>"#;
let r = rewrite_links(
html,
"https://t.example",
"acme",
&MsgId::new("m1"),
&signer(),
);
assert_eq!(r.mappings.len(), 2);
assert_eq!(r.mappings[0].link_id.as_str(), "L0");
assert_eq!(r.mappings[1].link_id.as_str(), "L1");
}
#[test]
fn rewrite_links_skips_internal_anchors() {
let html = r##"<a href="#section">jump</a>"##;
let r = rewrite_links(
html,
"https://t.example",
"acme",
&MsgId::new("m1"),
&signer(),
);
assert_eq!(r.mappings.len(), 0);
assert_eq!(r.html, html);
}
#[test]
fn rewrite_links_skips_mailto_tel_js() {
for href in ["mailto:x@y", "tel:+1", "javascript:void(0)", "data:text"] {
let html = format!(r#"<a href="{href}">x</a>"#);
let r = rewrite_links(
&html,
"https://t.example",
"acme",
&MsgId::new("m1"),
&signer(),
);
assert_eq!(r.mappings.len(), 0, "should skip {href}");
assert_eq!(r.html, html, "should not modify {href}");
}
}
#[test]
fn rewrite_links_idempotent_on_already_redirector() {
let html = r#"<a href="https://t.example/t/c/acme/m1/L0?tag=abc">x</a>"#;
let r = rewrite_links(
html,
"https://t.example",
"acme",
&MsgId::new("m1"),
&signer(),
);
assert_eq!(r.mappings.len(), 0);
}
#[test]
fn rewrite_links_handles_single_quoted_href() {
let html = r#"<a href='https://acme.com/'>x</a>"#;
let r = rewrite_links(
html,
"https://t.example",
"acme",
&MsgId::new("m1"),
&signer(),
);
assert_eq!(r.mappings.len(), 1);
}
#[test]
fn rewrite_links_case_insensitive_tag() {
let html = r#"<A HREF="https://acme.com/">x</A>"#;
let r = rewrite_links(
html,
"https://t.example",
"acme",
&MsgId::new("m1"),
&signer(),
);
assert_eq!(r.mappings.len(), 1);
}
#[test]
fn rewrite_links_signed_token_is_url_safe() {
let html = r#"<a href="https://acme.com/">x</a>"#;
let r = rewrite_links(
html,
"https://t.example",
"acme",
&MsgId::new("m1"),
&signer(),
);
let url = &r.html;
assert!(url.contains("?tag="));
let tag_pos = url.find("?tag=").unwrap() + "?tag=".len();
let tail = &url[tag_pos..];
let tag_end = tail.find(['"', '<']).unwrap();
let tag = &tail[..tag_end];
assert_eq!(tag.len(), 22);
assert!(!tag.contains('+'));
assert!(!tag.contains('/'));
assert!(!tag.contains('='));
}
#[test]
fn rewrite_links_path_escapes_tenant_and_msg() {
let html = r#"<a href="https://acme.com/">x</a>"#;
let r = rewrite_links(
html,
"https://t.example",
"acme corp",
&MsgId::new("msg/1"),
&signer(),
);
assert!(r.html.contains("/t/c/acme%20corp/msg%2F1/L0"));
}
#[test]
fn rewrite_links_preserves_anchor_attributes() {
let html = r#"<a href="https://acme.com/" target="_blank" rel="noopener">go</a>"#;
let r = rewrite_links(
html,
"https://t.example",
"acme",
&MsgId::new("m1"),
&signer(),
);
assert!(r.html.contains("target=\"_blank\""));
assert!(r.html.contains("rel=\"noopener\""));
}
#[test]
fn rewrite_links_strips_trailing_slash_from_base_url() {
let html = r#"<a href="https://acme.com/">x</a>"#;
let r = rewrite_links(
html,
"https://t.example/",
"acme",
&MsgId::new("m1"),
&signer(),
);
assert!(!r.html.contains("https://t.example//t/c/"));
assert!(r.html.contains("https://t.example/t/c/"));
}
}