1use regex_lite::Regex;
9use std::collections::BTreeMap;
10use std::sync::OnceLock;
11
12use crate::values::correlation::{normalize_url, slug_for_cb_id, slug_for_lid};
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum FieldKind {
20 ContentBlock,
21 EmailHtmlBody,
22 EmailPlainBody,
23 EmailSubject,
24 EmailPreheader,
25}
26
27impl FieldKind {
28 pub fn supports_html_anchor(self) -> bool {
29 matches!(self, FieldKind::ContentBlock | FieldKind::EmailHtmlBody)
30 }
31 pub fn supports_plaintext_anchor(self) -> bool {
32 matches!(self, FieldKind::EmailPlainBody)
33 }
34}
35
36#[derive(Debug, Clone, PartialEq, Eq)]
39pub enum DetectedEntry {
40 Lid {
41 key: String,
42 value: String,
43 url: Option<String>,
47 },
48 CbId {
49 key: String,
50 value: String,
51 name: String,
54 },
55}
56
57impl DetectedEntry {
58 pub fn key(&self) -> &str {
59 match self {
60 DetectedEntry::Lid { key, .. } | DetectedEntry::CbId { key, .. } => key,
61 }
62 }
63}
64
65#[derive(Debug, Clone)]
67pub struct TemplatizedField {
68 pub new_body: String,
69 pub entries: Vec<DetectedEntry>,
70 pub warnings: Vec<String>,
73}
74
75pub fn templatize_body(body: &str, field: FieldKind) -> TemplatizedField {
85 let mut spans: Vec<DetectionSpan> = Vec::new();
86 let mut used_lid_keys: BTreeMap<String, usize> = BTreeMap::new();
89 let mut used_cb_id_keys: BTreeMap<String, usize> = BTreeMap::new();
90 let mut cb_id_name_to_key: BTreeMap<String, String> = BTreeMap::new();
95 let mut warnings: Vec<String> = Vec::new();
96
97 for m in lid_match_re().captures_iter(body) {
99 let whole = m.get(0).expect("group 0 always present");
100 let value = m
101 .get(1)
102 .or(m.get(2))
103 .map(|g| g.as_str().to_string())
104 .expect("one of the value alternates matches");
105
106 let (url, key) = name_lid_for_field(body, whole.start(), field, &mut used_lid_keys);
107 if url.is_none() && !matches!(field, FieldKind::EmailSubject | FieldKind::EmailPreheader) {
108 warnings.push(format!(
109 "lid '{value}' at byte {} has no URL anchor; using sequential key '{key}'",
110 whole.start()
111 ));
112 }
113 if matches!(field, FieldKind::EmailSubject | FieldKind::EmailPreheader) {
114 warnings.push(format!(
120 "lid '{value}' detected in subject/preheader (key '{key}'); \
121 `export` does not refresh these — non-canonical env \
122 values files must be edited manually"
123 ));
124 }
125 spans.push(DetectionSpan {
126 range: whole.range(),
127 replacement: format!("| lid: '__BRAZESYNC.lid.{key}__'"),
128 entry: DetectedEntry::Lid { key, value, url },
129 });
130 }
131
132 for m in cb_id_match_re().captures_iter(body) {
134 let whole = m.get(0).expect("group 0 always present");
135 let name = m.get(1).expect("name capture present").as_str().to_string();
136 let value = m
137 .get(2)
138 .or(m.get(3))
139 .map(|g| g.as_str().to_string())
140 .expect("cbN capture present");
141 let key = match cb_id_name_to_key.get(&name) {
144 Some(prior) => prior.clone(),
145 None => {
146 let k = unique_key(slug_for_cb_id(&name), &mut used_cb_id_keys);
147 cb_id_name_to_key.insert(name.clone(), k.clone());
148 k
149 }
150 };
151 let replacement =
154 format!("{{{{content_blocks.${{{name}}} | id: '__BRAZESYNC.cb_id.{key}__'}}}}");
155 spans.push(DetectionSpan {
156 range: whole.range(),
157 replacement,
158 entry: DetectedEntry::CbId { key, value, name },
159 });
160 }
161
162 spans.sort_by_key(|s| s.range.start);
164 let mut new_body = body.to_string();
165 let mut entries_in_order: Vec<DetectedEntry> = Vec::with_capacity(spans.len());
166 for s in &spans {
167 entries_in_order.push(s.entry.clone());
168 }
169 for s in spans.into_iter().rev() {
170 new_body.replace_range(s.range, &s.replacement);
171 }
172
173 TemplatizedField {
174 new_body,
175 entries: entries_in_order,
176 warnings,
177 }
178}
179
180struct DetectionSpan {
181 range: std::ops::Range<usize>,
182 replacement: String,
183 entry: DetectedEntry,
184}
185
186fn lid_match_re() -> &'static Regex {
187 static RE: OnceLock<Regex> = OnceLock::new();
188 RE.get_or_init(|| {
189 Regex::new(r#"\|\s*lid:\s*(?:"([a-z0-9]{8,})"|'([a-z0-9]{8,})')"#)
191 .expect("lid match regex is valid")
192 })
193}
194
195fn cb_id_match_re() -> &'static Regex {
196 static RE: OnceLock<Regex> = OnceLock::new();
197 RE.get_or_init(|| {
198 Regex::new(
199 r#"\{\{\s*content_blocks\.\$\{\s*([^\s}|]+)\s*\}\s*\|\s*id:\s*(?:"(cb[0-9]+)"|'(cb[0-9]+)')\s*\}\}"#,
200 )
201 .expect("cb_id match regex is valid")
202 })
203}
204
205fn href_re() -> &'static Regex {
206 static RE: OnceLock<Regex> = OnceLock::new();
207 RE.get_or_init(|| {
208 Regex::new(r#"(?i)<a\b[^>]*?\bhref\s*=\s*(?:"([^"]*)"|'([^']*)')"#)
209 .expect("href regex is valid")
210 })
211}
212
213fn plaintext_url_re() -> &'static Regex {
214 static RE: OnceLock<Regex> = OnceLock::new();
215 RE.get_or_init(|| Regex::new(r#"https?://[^\s<>"']+"#).expect("plaintext URL regex is valid"))
216}
217
218fn name_lid_for_field(
219 body: &str,
220 lid_token_offset: usize,
221 field: FieldKind,
222 used: &mut BTreeMap<String, usize>,
223) -> (Option<String>, String) {
224 let url = preceding_url(body, lid_token_offset, field);
225 let key_source: String = match &url {
226 Some(u) => url_path_tail(u).to_string(),
227 None => String::new(),
228 };
229 let slug = slug_for_lid(&key_source);
230 let key = unique_key(slug, used);
231 (url, key)
232}
233
234fn preceding_url(body: &str, lid_token_offset: usize, field: FieldKind) -> Option<String> {
235 let raw = if field.supports_html_anchor() {
236 enclosing_anchor_href(body, lid_token_offset).or_else(|| {
243 let prefix = &body[..lid_token_offset];
244 href_re()
245 .captures_iter(prefix)
246 .last()
247 .and_then(|cap| cap.get(1).or(cap.get(2)))
248 .map(|m| m.as_str().to_string())
249 })
250 } else if field.supports_plaintext_anchor() {
251 let prefix = &body[..lid_token_offset];
252 plaintext_url_re()
253 .find_iter(prefix)
254 .last()
255 .map(|m| m.as_str().to_string())
256 } else {
257 None
258 };
259 raw.map(|r| normalize_url(&r))
260}
261
262fn enclosing_anchor_href(body: &str, lid_token_offset: usize) -> Option<String> {
271 let re = anchor_open_tag_re();
272 for m in re.find_iter(body) {
273 if m.start() > lid_token_offset {
274 break;
275 }
276 if m.end() > lid_token_offset {
277 let tag = &body[m.start()..m.end()];
278 return href_re()
279 .captures(tag)
280 .and_then(|cap| cap.get(1).or(cap.get(2)))
281 .map(|x| x.as_str().to_string());
282 }
283 }
284 None
285}
286
287fn anchor_open_tag_re() -> &'static Regex {
288 static RE: OnceLock<Regex> = OnceLock::new();
289 RE.get_or_init(|| Regex::new(r#"(?i)<a\b[^>]*>"#).expect("anchor open tag regex is valid"))
290}
291
292fn url_path_tail(url: &str) -> String {
293 let after_scheme = url.split_once("://").map(|(_, r)| r).unwrap_or(url);
298 let path_start = after_scheme
299 .find('/')
300 .map(|i| i + 1)
301 .unwrap_or(after_scheme.len());
302 let path = &after_scheme[path_start..];
303 path.rsplit('/')
304 .find(|s| !s.is_empty())
305 .unwrap_or("")
306 .to_string()
307}
308
309fn unique_key(base: String, used: &mut BTreeMap<String, usize>) -> String {
310 let count = used.entry(base.clone()).or_insert(0);
311 *count += 1;
312 if *count == 1 {
313 base
314 } else {
315 format!("{base}_{count}")
316 }
317}
318
319#[cfg(test)]
320mod tests {
321 use super::*;
322
323 #[test]
324 fn idempotent_on_already_templatized_body() {
325 let body = "<p>__BRAZESYNC.lid.cta__ kept verbatim</p>";
326 let r = templatize_body(body, FieldKind::ContentBlock);
327 assert_eq!(r.new_body, body);
328 assert!(r.entries.is_empty());
329 }
330
331 #[test]
332 fn rewrites_html_lid_with_url_anchor() {
333 let body = r#"<a href="https://example.com/spring-sale">{{x | lid: 'ai8kexrxcp03'}}</a>"#;
334 let r = templatize_body(body, FieldKind::ContentBlock);
335 assert!(r.new_body.contains("__BRAZESYNC.lid.spring_sale__"));
336 assert_eq!(r.entries.len(), 1);
337 match &r.entries[0] {
338 DetectedEntry::Lid { key, value, url } => {
339 assert_eq!(key, "spring_sale");
340 assert_eq!(value, "ai8kexrxcp03");
341 assert_eq!(url.as_deref(), Some("https://example.com/spring-sale"));
342 }
343 _ => panic!("expected Lid"),
344 }
345 }
346
347 #[test]
348 fn rewrites_cb_id_include() {
349 let body = "{{content_blocks.${promo_banner} | id: 'cb42'}}";
350 let r = templatize_body(body, FieldKind::ContentBlock);
351 assert!(r.new_body.contains("__BRAZESYNC.cb_id.promo_banner__"));
352 assert!(r.new_body.contains("${promo_banner}"));
354 assert_eq!(r.entries.len(), 1);
355 }
356
357 #[test]
358 fn dedupes_duplicate_url_with_sequential_suffix() {
359 let body = r#"
360<a href="https://example.com/cta">{{x | lid: 'ai8kexrxcp03'}}A</a>
361<a href="https://example.com/cta">{{x | lid: 'bj9lfsysxq14'}}B</a>"#;
362 let r = templatize_body(body, FieldKind::ContentBlock);
363 let keys: Vec<&str> = r.entries.iter().map(DetectedEntry::key).collect();
364 assert_eq!(keys, ["cta", "cta_2"]);
365 }
366
367 #[test]
368 fn plaintext_url_anchor_works() {
369 let body = "Click https://example.com/promo {{x | lid: 'ai8kexrxcp03'}} now.";
370 let r = templatize_body(body, FieldKind::EmailPlainBody);
371 match &r.entries[0] {
372 DetectedEntry::Lid { key, url, .. } => {
373 assert_eq!(key, "promo");
374 assert_eq!(url.as_deref(), Some("https://example.com/promo"));
375 }
376 _ => panic!(),
377 }
378 }
379
380 #[test]
381 fn subject_lid_warns_about_export_refresh_gap() {
382 let body = "Hello {{x | lid: 'ai8kexrxcp03'}} world";
386 let r = templatize_body(body, FieldKind::EmailSubject);
387 assert!(
388 r.warnings
389 .iter()
390 .any(|w| w.contains("export") && w.contains("subject")),
391 "expected manual-maintenance warning, got: {:?}",
392 r.warnings
393 );
394 match &r.entries[0] {
395 DetectedEntry::Lid { key, url, .. } => {
396 assert_eq!(key, "link_");
397 assert!(url.is_none());
398 }
399 _ => panic!(),
400 }
401 }
402
403 #[test]
404 fn repeated_cb_id_name_reuses_key() {
405 let body = "{{content_blocks.${promo} | id: 'cb10'}} ... \
409 {{content_blocks.${promo} | id: 'cb10'}}";
410 let r = templatize_body(body, FieldKind::ContentBlock);
411 assert_eq!(r.entries.len(), 2, "both occurrences detected");
412 assert_eq!(r.entries[0].key(), "promo");
413 assert_eq!(
414 r.entries[1].key(),
415 "promo",
416 "same ${{NAME}} must reuse the key"
417 );
418 }
419
420 #[test]
421 fn partially_templatized_body_picks_up_remaining_raw_lid() {
422 let body = r#"
425<a href="https://example.com/cta">{{ x | lid: '__BRAZESYNC.lid.cta__' }}A</a>
426<a href="https://example.com/promo">{{ x | lid: 'rawvalue1234' }}B</a>"#;
427 let r = templatize_body(body, FieldKind::ContentBlock);
428 assert_eq!(r.entries.len(), 1, "the raw lid must be detected");
429 match &r.entries[0] {
430 DetectedEntry::Lid { key, value, .. } => {
431 assert_eq!(key, "promo");
432 assert_eq!(value, "rawvalue1234");
433 }
434 _ => panic!("expected Lid"),
435 }
436 }
437
438 #[test]
439 fn html_lid_without_anchor_warns() {
440 let body = "{{x | lid: 'ai8kexrxcp03'}} just floating";
443 let r = templatize_body(body, FieldKind::EmailHtmlBody);
444 assert_eq!(r.entries.len(), 1);
445 assert!(!r.warnings.is_empty());
446 }
447
448 #[test]
449 fn lid_inside_href_attribute_value_uses_enclosing_anchor() {
450 let body = r#"<a href="https://med.example.com/product/jaypirca/50mg/?lid={{${cblid} | lid: 'ai8kexrxcp03'}}"><img src="x"/></a>"#;
455 let r = templatize_body(body, FieldKind::ContentBlock);
456 assert_eq!(r.entries.len(), 1);
457 match &r.entries[0] {
458 DetectedEntry::Lid { key, url, .. } => {
459 assert_eq!(key, "link_50mg");
460 assert_eq!(
461 url.as_deref(),
462 Some("https://med.example.com/product/jaypirca/50mg/")
463 );
464 }
465 _ => panic!("expected Lid"),
466 }
467 assert!(
468 r.warnings.is_empty(),
469 "no-anchor warning should not fire when href encloses the lid"
470 );
471 }
472
473 #[test]
474 fn enclosing_anchor_takes_precedence_over_earlier_unrelated_href() {
475 let body = r#"<a href="https://example.com/old">old</a> then <a href="https://example.com/new/path/?lid={{x | lid: 'ai8kexrxcp03'}}">new</a>"#;
479 let r = templatize_body(body, FieldKind::ContentBlock);
480 match &r.entries[0] {
481 DetectedEntry::Lid { url, .. } => {
482 assert_eq!(url.as_deref(), Some("https://example.com/new/path/"));
483 }
484 _ => panic!(),
485 }
486 }
487
488 #[test]
489 fn enclosing_anchor_without_href_falls_back_to_prior_href() {
490 let body = r#"<a href="https://example.com/earlier/path">x</a> <a name="anchor">text {{x | lid: 'ai8kexrxcp03'}}</a>"#;
496 let r = templatize_body(body, FieldKind::ContentBlock);
497 assert_eq!(r.entries.len(), 1);
498 match &r.entries[0] {
499 DetectedEntry::Lid { url, .. } => {
500 assert_eq!(url.as_deref(), Some("https://example.com/earlier/path"));
501 }
502 _ => panic!("expected Lid"),
503 }
504 }
505
506 #[test]
507 fn url_path_tail_uses_last_nonempty_segment() {
508 assert_eq!(
509 url_path_tail("https://example.com/promo/spring-sale"),
510 "spring-sale"
511 );
512 assert_eq!(url_path_tail("https://example.com/"), "");
513 assert_eq!(url_path_tail("https://example.com"), "");
514 }
515}