use std::collections::BTreeMap;
use std::sync::OnceLock;
use regex_lite::Regex;
use crate::values::correlation::{
extract_cb_id_values, extract_html_lid_values, extract_lid_values_unanchored,
extract_plaintext_lid_values, normalize_url, slug_for_lid, CbIdCorrelation, LidCorrelation,
};
use crate::values::placeholder::{
extract_placeholders, find_suspicious_placeholders, PlaceholderType, ResolutionError, TOKEN,
};
use crate::values::templatize::FieldKind;
#[derive(Debug, Clone)]
pub struct PreparedTemplate {
pub body: String,
pub errors: Vec<ResolutionError>,
pub warnings: Vec<String>,
}
pub fn prepare_field(template: &str, remote: Option<&str>, field: FieldKind) -> PreparedTemplate {
let mut errors: Vec<ResolutionError> = Vec::new();
for tok in find_suspicious_placeholders(template) {
errors.push(ResolutionError::RetiredNamespace { token: tok });
}
if !template.contains(TOKEN) {
return PreparedTemplate {
body: template.to_string(),
errors,
warnings: Vec::new(),
};
}
let (body, mut warnings) = match remote {
Some(_) => (template.to_string(), Vec::new()),
None => strip_cb_id_filters(template),
};
let placeholders = extract_placeholders(&body);
let mut resolved: Vec<(usize, usize, Option<String>)> = Vec::new();
let lid_indices: Vec<usize> = placeholders
.iter()
.enumerate()
.filter(|(_, p)| p.ty == Some(PlaceholderType::Lid))
.map(|(i, _)| i)
.collect();
let lid_values: Vec<Option<String>> = match remote {
Some(remote_body) => resolve_lid_batch(
&body,
&placeholders,
&lid_indices,
remote_body,
field,
&mut warnings,
),
None => fallback_lid_batch(&body, &placeholders, &lid_indices, field),
};
let cb_id_resolved: BTreeMap<usize, Option<String>> = match remote {
Some(remote_body) => resolve_cb_id_batch(&body, &placeholders, remote_body, &mut warnings),
None => BTreeMap::new(),
};
let mut lid_iter = lid_values.into_iter();
for ph in &placeholders {
match ph.ty {
None => {
errors.push(ResolutionError::UnknownContext { start: ph.start });
resolved.push((ph.start, ph.end, None));
}
Some(PlaceholderType::Lid) => {
let v = lid_iter.next().flatten();
if v.is_none() {
let anchor = lid_anchor_for(&body, ph.start, field);
errors.push(ResolutionError::UnresolvedLid {
start: ph.start,
anchor,
});
}
resolved.push((ph.start, ph.end, v));
}
Some(PlaceholderType::CbId) => {
let v = cb_id_resolved.get(&ph.start).cloned().flatten();
if v.is_none() {
let name = cb_id_name_at(&body, ph.start);
errors.push(ResolutionError::UnresolvedCbId {
start: ph.start,
name,
});
}
resolved.push((ph.start, ph.end, v));
}
}
}
let mut out = body;
for (start, end, value) in resolved.into_iter().rev() {
if let Some(v) = value {
out.replace_range(start..end, &v);
}
}
PreparedTemplate {
body: out,
errors,
warnings,
}
}
fn resolve_lid_batch(
body: &str,
placeholders: &[crate::values::placeholder::Placeholder],
lid_indices: &[usize],
remote: &str,
field: FieldKind,
warnings: &mut Vec<String>,
) -> Vec<Option<String>> {
if lid_indices.is_empty() {
return Vec::new();
}
if !field.supports_html_anchor() && !field.supports_plaintext_anchor() {
return resolve_lid_positional(placeholders, lid_indices, remote, field, warnings);
}
let remote_pairs: Vec<LidCorrelation> = if field.supports_html_anchor() {
extract_html_lid_values(remote)
} else {
extract_plaintext_lid_values(remote)
};
let anchors: Vec<Option<String>> = lid_indices
.iter()
.map(|&i| lid_anchor_for(body, placeholders[i].start, field))
.collect();
let mut by_url: BTreeMap<String, std::collections::VecDeque<&LidCorrelation>> = BTreeMap::new();
for p in &remote_pairs {
by_url.entry(p.url.clone()).or_default().push_back(p);
}
let mut tmpl_per_url: BTreeMap<String, usize> = BTreeMap::new();
for u in anchors.iter().flatten() {
*tmpl_per_url.entry(u.clone()).or_insert(0) += 1;
}
for (url, bucket) in &by_url {
let tmpl_count = tmpl_per_url.get(url).copied().unwrap_or(0);
if bucket.len() > 1 || (tmpl_count > 0 && bucket.len() != tmpl_count) {
warnings.push(format!(
"URL '{url}' has {} remote lid occurrences and {tmpl_count} \
template placeholders — using positional FIFO match. \
If links were reordered in Braze, lid values may be assigned \
to the wrong placeholder.",
bucket.len()
));
}
}
let mut out = Vec::with_capacity(lid_indices.len());
for anchor in anchors {
let Some(url) = anchor else {
warnings.push(
"lid placeholder has no URL anchor in template — \
anchor-less correlation is not supported; resolve will fail"
.to_string(),
);
out.push(None);
continue;
};
let pick = by_url.get_mut(&url).and_then(|b| b.pop_front());
match pick {
Some(p) => out.push(Some(p.value.clone())),
None => {
warnings.push(format!("lid: URL anchor '{url}' not found in remote body"));
out.push(None);
}
}
}
out
}
fn resolve_lid_positional(
placeholders: &[crate::values::placeholder::Placeholder],
lid_indices: &[usize],
remote: &str,
field: FieldKind,
warnings: &mut Vec<String>,
) -> Vec<Option<String>> {
let remote_values = extract_lid_values_unanchored(remote);
let field_label = match field {
FieldKind::EmailSubject => "subject",
FieldKind::EmailPreheader => "preheader",
_ => "field",
};
if remote_values.len() != lid_indices.len() {
warnings.push(format!(
"{field_label} has {} lid placeholder(s) but remote body has {} lid value(s); \
positional match may misalign — review rendered output",
lid_indices.len(),
remote_values.len()
));
}
let _ = placeholders;
let mut out = Vec::with_capacity(lid_indices.len());
let mut iter = remote_values.into_iter();
for _ in lid_indices {
out.push(iter.next());
}
out
}
fn resolve_cb_id_batch(
body: &str,
placeholders: &[crate::values::placeholder::Placeholder],
remote: &str,
warnings: &mut Vec<String>,
) -> BTreeMap<usize, Option<String>> {
let remote_pairs = extract_cb_id_values(remote);
let remote_by_name: BTreeMap<&str, &CbIdCorrelation> =
remote_pairs.iter().map(|p| (p.name.as_str(), p)).collect();
let mut out: BTreeMap<usize, Option<String>> = BTreeMap::new();
for ph in placeholders {
if ph.ty != Some(PlaceholderType::CbId) {
continue;
}
let name = match cb_id_name_at(body, ph.start) {
Some(n) => n,
None => {
warnings.push(format!(
"cb_id: `__BRAZESYNC__` at byte {} not inside `{{{{content_blocks.${{NAME}} | id: '…'}}}}` — cannot correlate",
ph.start
));
out.insert(ph.start, None);
continue;
}
};
match remote_by_name.get(name.as_str()) {
Some(pick) => {
out.insert(ph.start, Some(pick.value.clone()));
}
None => {
warnings.push(format!(
"cb_id: `${{{name}}}` include not found in remote body"
));
out.insert(ph.start, None);
}
}
}
out
}
fn fallback_lid_batch(
body: &str,
placeholders: &[crate::values::placeholder::Placeholder],
lid_indices: &[usize],
field: FieldKind,
) -> Vec<Option<String>> {
let mut used: BTreeMap<String, usize> = BTreeMap::new();
let mut seq = 0usize;
let mut out = Vec::with_capacity(lid_indices.len());
for &i in lid_indices {
let anchor = lid_anchor_for(body, placeholders[i].start, field);
let base = match anchor.as_deref() {
Some(u) => {
let tail = url_path_tail(u);
let slug = slug_for_lid(&tail);
if slug.is_empty() {
seq += 1;
format!("lid_{seq}")
} else {
slug
}
}
None => {
seq += 1;
format!("lid_{seq}")
}
};
out.push(Some(unique(base, &mut used)));
}
out
}
fn unique(base: String, used: &mut BTreeMap<String, usize>) -> String {
let count = used.entry(base.clone()).or_insert(0);
*count += 1;
if *count == 1 {
base
} else {
format!("{base}_{count}")
}
}
fn url_path_tail(url: &str) -> String {
let after_scheme = url.split_once("://").map(|(_, r)| r).unwrap_or(url);
let path_start = after_scheme
.find('/')
.map(|i| i + 1)
.unwrap_or(after_scheme.len());
let path = after_scheme[path_start..]
.split(['?', '#'])
.next()
.unwrap_or("");
path.rsplit('/')
.find(|s| !s.is_empty())
.unwrap_or("")
.to_string()
}
fn strip_cb_id_filters(body: &str) -> (String, Vec<String>) {
let re = cb_id_filter_re();
let mut warnings: Vec<String> = Vec::new();
let mut spans: Vec<(std::ops::Range<usize>, String)> = Vec::new();
for cap in re.captures_iter(body) {
let whole = cap.get(0).expect("group 0 always present");
let name = cap
.get(1)
.map(|m| m.as_str().to_string())
.unwrap_or_default();
warnings.push(format!(
"cb_id `${{{name}}}`: new resource — stripping `| id: '…'` filter; \
Braze will assign a cb_id on first save"
));
spans.push((whole.range(), format!("{{{{content_blocks.${{{name}}}}}}}")));
}
let mut out = body.to_string();
for (range, replacement) in spans.into_iter().rev() {
out.replace_range(range, &replacement);
}
(out, warnings)
}
fn cb_id_filter_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(
r#"\{\{\s*content_blocks\.\$\{\s*([^\s}|]+)\s*\}\s*\|\s*id:\s*['"]__BRAZESYNC__['"]\s*\}\}"#,
)
.expect("cb_id filter regex is valid")
})
}
fn cb_id_name_at(body: &str, offset: usize) -> Option<String> {
let re = cb_id_template_re();
for cap in re.captures_iter(body) {
let whole = cap.get(0)?;
if whole.start() <= offset && offset < whole.end() {
return cap.get(1).map(|m| m.as_str().to_string());
}
}
None
}
fn cb_id_template_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(
r#"\{\{\s*content_blocks\.\$\{\s*([^\s}|]+)\s*\}\s*\|\s*id:\s*['"]__BRAZESYNC__['"]\s*\}\}"#,
)
.expect("cb_id template regex is valid")
})
}
fn lid_anchor_for(body: &str, offset: usize, field: FieldKind) -> Option<String> {
if field.supports_html_anchor() {
if let Some(tag) = enclosing_open_tag(body, offset) {
if let Some(url) = url_attr_re()
.captures(tag)
.and_then(|c| c.get(1).or(c.get(2)))
{
return Some(normalize_url(url.as_str()));
}
return None;
}
let prefix = &body[..offset];
anchor_href_re()
.captures_iter(prefix)
.last()
.and_then(|cap| cap.get(1).or(cap.get(2)))
.map(|m| normalize_url(m.as_str()))
} else if field.supports_plaintext_anchor() {
let prefix = &body[..offset];
plaintext_url_re()
.find_iter(prefix)
.last()
.map(|m| normalize_url(m.as_str()))
} else {
None
}
}
fn enclosing_open_tag(body: &str, offset: usize) -> Option<&str> {
for m in element_open_tag_re().find_iter(body) {
if m.start() > offset {
break;
}
if m.end() > offset {
return Some(&body[m.start()..m.end()]);
}
}
None
}
fn anchor_href_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(r#"(?i)<a\b[^>]*?\bhref\s*=\s*(?:"([^"]*)"|'([^']*)')"#)
.expect("anchor href regex is valid")
})
}
fn url_attr_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(
r#"(?i)\s(?:[a-z][a-z0-9_-]*:)?(?:href|src|action)\s*=\s*(?:"([^"]*)"|'([^']*)')"#,
)
.expect("url attr regex is valid")
})
}
fn plaintext_url_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| Regex::new(r#"https?://[^\s<>"']+"#).expect("plaintext URL regex is valid"))
}
fn element_open_tag_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(r#"(?i)<[a-z][a-z0-9_.:-]*\b[^>]*>"#).expect("element open tag regex is valid")
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn no_placeholders_returns_body_verbatim() {
let p = prepare_field("<p>hi</p>", Some("<p>hi</p>"), FieldKind::ContentBlock);
assert_eq!(p.body, "<p>hi</p>");
assert!(p.errors.is_empty());
}
#[test]
fn html_lid_resolved_via_url_anchor() {
let template = r#"<a href="https://example.com/cta">{{x | lid: '__BRAZESYNC__'}}</a>"#;
let remote = r#"<a href="https://example.com/cta">{{x | lid: 'newlidvalue1'}}</a>"#;
let p = prepare_field(template, Some(remote), FieldKind::ContentBlock);
assert!(p.errors.is_empty(), "{:?}", p.errors);
assert!(p.body.contains("'newlidvalue1'"));
}
#[test]
fn two_lid_placeholders_sharing_one_url_consume_distinct_remote_values() {
let template = r#"<a href="https://x.com/a">{{x | lid: '__BRAZESYNC__'}}</a>
<a href="https://x.com/a">{{x | lid: '__BRAZESYNC__'}}</a>"#;
let remote = r#"<a href="https://x.com/a">{{x | lid: 'firstvalu1a'}}</a>
<a href="https://x.com/a">{{x | lid: 'secondval2b'}}</a>"#;
let p = prepare_field(template, Some(remote), FieldKind::ContentBlock);
assert!(p.errors.is_empty(), "{:?}", p.errors);
assert!(p.body.contains("'firstvalu1a'"));
assert!(p.body.contains("'secondval2b'"));
}
#[test]
fn cb_id_resolved_via_name() {
let template = "{{content_blocks.${promo_banner} | id: '__BRAZESYNC__'}}";
let remote = "{{content_blocks.${promo_banner} | id: 'cb99'}}";
let p = prepare_field(template, Some(remote), FieldKind::ContentBlock);
assert!(p.errors.is_empty());
assert!(p.body.contains("'cb99'"));
}
#[test]
fn new_resource_lid_uses_url_slug_fallback() {
let template = r#"<a href="https://x.com/spring-sale">{{x | lid: '__BRAZESYNC__'}}</a>"#;
let p = prepare_field(template, None, FieldKind::ContentBlock);
assert!(p.errors.is_empty());
assert!(p.body.contains("'spring_sale'"), "got: {}", p.body);
}
#[test]
fn new_resource_lid_without_anchor_uses_sequential() {
let template = "no anchor {{x | lid: '__BRAZESYNC__'}} mid {{x | lid: '__BRAZESYNC__'}}";
let p = prepare_field(template, None, FieldKind::EmailSubject);
assert!(p.body.contains("'lid_1'"));
assert!(p.body.contains("'lid_2'"));
}
#[test]
fn new_resource_strips_cb_id_filter() {
let template = "before {{content_blocks.${promo} | id: '__BRAZESYNC__'}} after";
let p = prepare_field(template, None, FieldKind::ContentBlock);
assert_eq!(p.body, "before {{content_blocks.${promo}}} after");
assert!(p.warnings.iter().any(|w| w.contains("promo")));
}
#[test]
fn lid_without_remote_match_surfaces_error() {
let template = r#"<a href="https://x.com/cta">{{x | lid: '__BRAZESYNC__'}}</a>"#;
let remote = r#"<p>no anchor</p>"#;
let p = prepare_field(template, Some(remote), FieldKind::ContentBlock);
assert!(p
.errors
.iter()
.any(|e| matches!(e, ResolutionError::UnresolvedLid { .. })));
}
#[test]
fn retired_envelope_is_fatal() {
let template = "stuff __BRAZESYNC.lid.foo__ stuff";
let p = prepare_field(template, None, FieldKind::ContentBlock);
assert!(p
.errors
.iter()
.any(|e| matches!(e, ResolutionError::RetiredNamespace { .. })));
}
#[test]
fn unknown_context_is_fatal() {
let template = "bare __BRAZESYNC__ token";
let p = prepare_field(template, Some(""), FieldKind::ContentBlock);
assert!(p
.errors
.iter()
.any(|e| matches!(e, ResolutionError::UnknownContext { .. })));
}
#[test]
fn vml_href_anchors_lid() {
let template = r#"<v:roundrect href="https://x.com/page/?lid={{x | lid: '__BRAZESYNC__'}}">label</v:roundrect>"#;
let remote = r#"<v:roundrect href="https://x.com/page/?lid={{x | lid: 'liveeeeeeee1'}}">label</v:roundrect>"#;
let p = prepare_field(template, Some(remote), FieldKind::ContentBlock);
assert!(p.errors.is_empty(), "{:?}", p.errors);
assert!(p.body.contains("'liveeeeeeee1'"));
}
#[test]
fn plaintext_url_anchor_matches() {
let template = "Visit https://x.com/cta {{x | lid: '__BRAZESYNC__'}} now";
let remote = "Visit https://x.com/cta {{x | lid: 'liveeeeeeee1'}} now";
let p = prepare_field(template, Some(remote), FieldKind::EmailPlainBody);
assert!(p.errors.is_empty());
assert!(p.body.contains("'liveeeeeeee1'"));
}
#[test]
fn subject_lid_resolves_positionally() {
let template = "{{x | lid: '__BRAZESYNC__'}} A {{y | lid: '__BRAZESYNC__'}}";
let remote = "{{x | lid: 'firstval123'}} A {{y | lid: 'secondval2b'}}";
let p = prepare_field(template, Some(remote), FieldKind::EmailSubject);
assert!(p.errors.is_empty(), "{:?}", p.errors);
assert!(p.body.contains("'firstval123'"));
assert!(p.body.contains("'secondval2b'"));
}
#[test]
fn new_resource_plaintext_lid_uses_url_slug() {
let template = "Visit https://x.com/spring-sale {{x | lid: '__BRAZESYNC__'}} now";
let p = prepare_field(template, None, FieldKind::EmailPlainBody);
assert!(p.errors.is_empty(), "{:?}", p.errors);
assert!(
p.body.contains("'spring_sale'"),
"plaintext URL slug must be used, got: {}",
p.body
);
}
#[test]
fn url_path_tail_strips_query_and_fragment() {
assert_eq!(url_path_tail("https://x.com/page/?utm=1"), "page");
assert_eq!(url_path_tail("https://x.com/page/#section"), "page");
assert_eq!(url_path_tail("https://x.com/page/?a=1#b"), "page");
assert_eq!(url_path_tail("https://x.com/"), "");
assert_eq!(url_path_tail("https://x.com/sale"), "sale");
}
}