1use regex_lite::Regex;
9use std::collections::BTreeMap;
10use std::sync::OnceLock;
11
12use crate::values::correlation::{normalize_url, slug_for_cb_id, slug_for_lid};
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum FieldKind {
20 ContentBlock,
21 EmailHtmlBody,
22 EmailPlainBody,
23 EmailSubject,
24 EmailPreheader,
25}
26
27impl FieldKind {
28 pub fn supports_html_anchor(self) -> bool {
29 matches!(self, FieldKind::ContentBlock | FieldKind::EmailHtmlBody)
30 }
31 pub fn supports_plaintext_anchor(self) -> bool {
32 matches!(self, FieldKind::EmailPlainBody)
33 }
34}
35
36#[derive(Debug, Clone, PartialEq, Eq)]
39pub enum DetectedEntry {
40 Lid {
41 key: String,
42 value: String,
43 url: Option<String>,
47 },
48 CbId {
49 key: String,
50 value: String,
51 name: String,
54 },
55}
56
57impl DetectedEntry {
58 pub fn key(&self) -> &str {
59 match self {
60 DetectedEntry::Lid { key, .. } | DetectedEntry::CbId { key, .. } => key,
61 }
62 }
63}
64
65#[derive(Debug, Clone)]
67pub struct TemplatizedField {
68 pub new_body: String,
69 pub entries: Vec<DetectedEntry>,
70 pub warnings: Vec<String>,
73}
74
75pub fn templatize_body(body: &str, field: FieldKind) -> TemplatizedField {
85 let mut spans: Vec<DetectionSpan> = Vec::new();
86 let mut used_lid_keys: BTreeMap<String, usize> = BTreeMap::new();
89 let mut used_cb_id_keys: BTreeMap<String, usize> = BTreeMap::new();
90 let mut cb_id_name_to_key: BTreeMap<String, String> = BTreeMap::new();
95 let mut warnings: Vec<String> = Vec::new();
96
97 for m in lid_match_re().captures_iter(body) {
99 let whole = m.get(0).expect("group 0 always present");
100 let value = m
101 .get(1)
102 .or(m.get(2))
103 .map(|g| g.as_str().to_string())
104 .expect("one of the value alternates matches");
105
106 let (url, key) = name_lid_for_field(body, whole.start(), field, &mut used_lid_keys);
107 if url.is_none() && !matches!(field, FieldKind::EmailSubject | FieldKind::EmailPreheader) {
108 warnings.push(format!(
109 "lid '{value}' at byte {} has no URL anchor; using sequential key '{key}'",
110 whole.start()
111 ));
112 }
113 if matches!(field, FieldKind::EmailSubject | FieldKind::EmailPreheader) {
114 warnings.push(format!(
120 "lid '{value}' detected in subject/preheader (key '{key}'); \
121 `export` does not refresh these — non-canonical env \
122 values files must be edited manually"
123 ));
124 }
125 spans.push(DetectionSpan {
126 range: whole.range(),
127 replacement: format!("| lid: '__BRAZESYNC.lid.{key}__'"),
128 entry: DetectedEntry::Lid { key, value, url },
129 });
130 }
131
132 for m in cb_id_match_re().captures_iter(body) {
134 let whole = m.get(0).expect("group 0 always present");
135 let name = m.get(1).expect("name capture present").as_str().to_string();
136 let value = m
137 .get(2)
138 .or(m.get(3))
139 .map(|g| g.as_str().to_string())
140 .expect("cbN capture present");
141 let key = match cb_id_name_to_key.get(&name) {
144 Some(prior) => prior.clone(),
145 None => {
146 let k = unique_key(slug_for_cb_id(&name), &mut used_cb_id_keys);
147 cb_id_name_to_key.insert(name.clone(), k.clone());
148 k
149 }
150 };
151 let replacement =
154 format!("{{{{content_blocks.${{{name}}} | id: '__BRAZESYNC.cb_id.{key}__'}}}}");
155 spans.push(DetectionSpan {
156 range: whole.range(),
157 replacement,
158 entry: DetectedEntry::CbId { key, value, name },
159 });
160 }
161
162 spans.sort_by_key(|s| s.range.start);
164 let mut new_body = body.to_string();
165 let mut entries_in_order: Vec<DetectedEntry> = Vec::with_capacity(spans.len());
166 for s in &spans {
167 entries_in_order.push(s.entry.clone());
168 }
169 for s in spans.into_iter().rev() {
170 new_body.replace_range(s.range, &s.replacement);
171 }
172
173 TemplatizedField {
174 new_body,
175 entries: entries_in_order,
176 warnings,
177 }
178}
179
180struct DetectionSpan {
181 range: std::ops::Range<usize>,
182 replacement: String,
183 entry: DetectedEntry,
184}
185
186fn lid_match_re() -> &'static Regex {
187 static RE: OnceLock<Regex> = OnceLock::new();
188 RE.get_or_init(|| {
189 Regex::new(r#"\|\s*lid:\s*(?:"([a-z0-9]{8,})"|'([a-z0-9]{8,})')"#)
191 .expect("lid match regex is valid")
192 })
193}
194
195fn cb_id_match_re() -> &'static Regex {
196 static RE: OnceLock<Regex> = OnceLock::new();
197 RE.get_or_init(|| {
198 Regex::new(
199 r#"\{\{\s*content_blocks\.\$\{\s*([^\s}|]+)\s*\}\s*\|\s*id:\s*(?:"(cb[0-9]+)"|'(cb[0-9]+)')\s*\}\}"#,
200 )
201 .expect("cb_id match regex is valid")
202 })
203}
204
205fn anchor_href_re() -> &'static Regex {
210 static RE: OnceLock<Regex> = OnceLock::new();
211 RE.get_or_init(|| {
212 Regex::new(r#"(?i)<a\b[^>]*?\bhref\s*=\s*(?:"([^"]*)"|'([^']*)')"#)
213 .expect("anchor href regex is valid")
214 })
215}
216
217fn url_attr_re() -> &'static Regex {
225 static RE: OnceLock<Regex> = OnceLock::new();
226 RE.get_or_init(|| {
227 Regex::new(
228 r#"(?i)\s(?:[a-z][a-z0-9_-]*:)?(?:href|src|action)\s*=\s*(?:"([^"]*)"|'([^']*)')"#,
229 )
230 .expect("url attr regex is valid")
231 })
232}
233
234fn plaintext_url_re() -> &'static Regex {
235 static RE: OnceLock<Regex> = OnceLock::new();
236 RE.get_or_init(|| Regex::new(r#"https?://[^\s<>"']+"#).expect("plaintext URL regex is valid"))
237}
238
239fn name_lid_for_field(
240 body: &str,
241 lid_token_offset: usize,
242 field: FieldKind,
243 used: &mut BTreeMap<String, usize>,
244) -> (Option<String>, String) {
245 let url = preceding_url(body, lid_token_offset, field);
246 let key_source: String = match &url {
247 Some(u) => url_path_tail(u).to_string(),
248 None => String::new(),
249 };
250 let slug = slug_for_lid(&key_source);
251 let key = unique_key(slug, used);
252 (url, key)
253}
254
255fn preceding_url(body: &str, lid_token_offset: usize, field: FieldKind) -> Option<String> {
256 let raw = if field.supports_html_anchor() {
257 match enclosing_open_tag(body, lid_token_offset) {
263 Some(tag) => url_attr_re()
264 .captures(tag)
265 .and_then(|cap| cap.get(1).or(cap.get(2)))
266 .map(|x| x.as_str().to_string()),
267 None => {
268 let prefix = &body[..lid_token_offset];
269 anchor_href_re()
270 .captures_iter(prefix)
271 .last()
272 .and_then(|cap| cap.get(1).or(cap.get(2)))
273 .map(|m| m.as_str().to_string())
274 }
275 }
276 } else if field.supports_plaintext_anchor() {
277 let prefix = &body[..lid_token_offset];
278 plaintext_url_re()
279 .find_iter(prefix)
280 .last()
281 .map(|m| m.as_str().to_string())
282 } else {
283 None
284 };
285 raw.map(|r| normalize_url(&r))
286}
287
288fn enclosing_open_tag(body: &str, lid_token_offset: usize) -> Option<&str> {
294 let re = element_open_tag_re();
295 for m in re.find_iter(body) {
296 if m.start() > lid_token_offset {
297 break;
298 }
299 if m.end() > lid_token_offset {
300 return Some(&body[m.start()..m.end()]);
301 }
302 }
303 None
304}
305
306fn element_open_tag_re() -> &'static Regex {
307 static RE: OnceLock<Regex> = OnceLock::new();
308 RE.get_or_init(|| {
312 Regex::new(r#"(?i)<[a-z][a-z0-9_.:-]*\b[^>]*>"#).expect("element open tag regex is valid")
313 })
314}
315
316fn url_path_tail(url: &str) -> String {
317 let after_scheme = url.split_once("://").map(|(_, r)| r).unwrap_or(url);
322 let path_start = after_scheme
323 .find('/')
324 .map(|i| i + 1)
325 .unwrap_or(after_scheme.len());
326 let path = &after_scheme[path_start..];
327 path.rsplit('/')
328 .find(|s| !s.is_empty())
329 .unwrap_or("")
330 .to_string()
331}
332
333fn unique_key(base: String, used: &mut BTreeMap<String, usize>) -> String {
334 let count = used.entry(base.clone()).or_insert(0);
335 *count += 1;
336 if *count == 1 {
337 base
338 } else {
339 format!("{base}_{count}")
340 }
341}
342
343#[cfg(test)]
344mod tests {
345 use super::*;
346
347 #[test]
348 fn idempotent_on_already_templatized_body() {
349 let body = "<p>__BRAZESYNC.lid.cta__ kept verbatim</p>";
350 let r = templatize_body(body, FieldKind::ContentBlock);
351 assert_eq!(r.new_body, body);
352 assert!(r.entries.is_empty());
353 }
354
355 #[test]
356 fn rewrites_html_lid_with_url_anchor() {
357 let body = r#"<a href="https://example.com/spring-sale">{{x | lid: 'ai8kexrxcp03'}}</a>"#;
358 let r = templatize_body(body, FieldKind::ContentBlock);
359 assert!(r.new_body.contains("__BRAZESYNC.lid.spring_sale__"));
360 assert_eq!(r.entries.len(), 1);
361 match &r.entries[0] {
362 DetectedEntry::Lid { key, value, url } => {
363 assert_eq!(key, "spring_sale");
364 assert_eq!(value, "ai8kexrxcp03");
365 assert_eq!(url.as_deref(), Some("https://example.com/spring-sale"));
366 }
367 _ => panic!("expected Lid"),
368 }
369 }
370
371 #[test]
372 fn rewrites_cb_id_include() {
373 let body = "{{content_blocks.${promo_banner} | id: 'cb42'}}";
374 let r = templatize_body(body, FieldKind::ContentBlock);
375 assert!(r.new_body.contains("__BRAZESYNC.cb_id.promo_banner__"));
376 assert!(r.new_body.contains("${promo_banner}"));
378 assert_eq!(r.entries.len(), 1);
379 }
380
381 #[test]
382 fn dedupes_duplicate_url_with_sequential_suffix() {
383 let body = r#"
384<a href="https://example.com/cta">{{x | lid: 'ai8kexrxcp03'}}A</a>
385<a href="https://example.com/cta">{{x | lid: 'bj9lfsysxq14'}}B</a>"#;
386 let r = templatize_body(body, FieldKind::ContentBlock);
387 let keys: Vec<&str> = r.entries.iter().map(DetectedEntry::key).collect();
388 assert_eq!(keys, ["cta", "cta_2"]);
389 }
390
391 #[test]
392 fn plaintext_url_anchor_works() {
393 let body = "Click https://example.com/promo {{x | lid: 'ai8kexrxcp03'}} now.";
394 let r = templatize_body(body, FieldKind::EmailPlainBody);
395 match &r.entries[0] {
396 DetectedEntry::Lid { key, url, .. } => {
397 assert_eq!(key, "promo");
398 assert_eq!(url.as_deref(), Some("https://example.com/promo"));
399 }
400 _ => panic!(),
401 }
402 }
403
404 #[test]
405 fn subject_lid_warns_about_export_refresh_gap() {
406 let body = "Hello {{x | lid: 'ai8kexrxcp03'}} world";
410 let r = templatize_body(body, FieldKind::EmailSubject);
411 assert!(
412 r.warnings
413 .iter()
414 .any(|w| w.contains("export") && w.contains("subject")),
415 "expected manual-maintenance warning, got: {:?}",
416 r.warnings
417 );
418 match &r.entries[0] {
419 DetectedEntry::Lid { key, url, .. } => {
420 assert_eq!(key, "link_");
421 assert!(url.is_none());
422 }
423 _ => panic!(),
424 }
425 }
426
427 #[test]
428 fn repeated_cb_id_name_reuses_key() {
429 let body = "{{content_blocks.${promo} | id: 'cb10'}} ... \
433 {{content_blocks.${promo} | id: 'cb10'}}";
434 let r = templatize_body(body, FieldKind::ContentBlock);
435 assert_eq!(r.entries.len(), 2, "both occurrences detected");
436 assert_eq!(r.entries[0].key(), "promo");
437 assert_eq!(
438 r.entries[1].key(),
439 "promo",
440 "same ${{NAME}} must reuse the key"
441 );
442 }
443
444 #[test]
445 fn partially_templatized_body_picks_up_remaining_raw_lid() {
446 let body = r#"
449<a href="https://example.com/cta">{{ x | lid: '__BRAZESYNC.lid.cta__' }}A</a>
450<a href="https://example.com/promo">{{ x | lid: 'rawvalue1234' }}B</a>"#;
451 let r = templatize_body(body, FieldKind::ContentBlock);
452 assert_eq!(r.entries.len(), 1, "the raw lid must be detected");
453 match &r.entries[0] {
454 DetectedEntry::Lid { key, value, .. } => {
455 assert_eq!(key, "promo");
456 assert_eq!(value, "rawvalue1234");
457 }
458 _ => panic!("expected Lid"),
459 }
460 }
461
462 #[test]
463 fn html_lid_without_anchor_warns() {
464 let body = "{{x | lid: 'ai8kexrxcp03'}} just floating";
467 let r = templatize_body(body, FieldKind::EmailHtmlBody);
468 assert_eq!(r.entries.len(), 1);
469 assert!(!r.warnings.is_empty());
470 }
471
472 #[test]
473 fn lid_inside_href_attribute_value_uses_enclosing_anchor() {
474 let body = r#"<a href="https://med.example.com/product/jaypirca/50mg/?lid={{${cblid} | lid: 'ai8kexrxcp03'}}"><img src="x"/></a>"#;
479 let r = templatize_body(body, FieldKind::ContentBlock);
480 assert_eq!(r.entries.len(), 1);
481 match &r.entries[0] {
482 DetectedEntry::Lid { key, url, .. } => {
483 assert_eq!(key, "link_50mg");
484 assert_eq!(
485 url.as_deref(),
486 Some("https://med.example.com/product/jaypirca/50mg/")
487 );
488 }
489 _ => panic!("expected Lid"),
490 }
491 assert!(
492 r.warnings.is_empty(),
493 "no-anchor warning should not fire when href encloses the lid"
494 );
495 }
496
497 #[test]
498 fn enclosing_anchor_takes_precedence_over_earlier_unrelated_href() {
499 let body = r#"<a href="https://example.com/old">old</a> then <a href="https://example.com/new/path/?lid={{x | lid: 'ai8kexrxcp03'}}">new</a>"#;
503 let r = templatize_body(body, FieldKind::ContentBlock);
504 match &r.entries[0] {
505 DetectedEntry::Lid { url, .. } => {
506 assert_eq!(url.as_deref(), Some("https://example.com/new/path/"));
507 }
508 _ => panic!(),
509 }
510 }
511
512 #[test]
513 fn enclosing_anchor_without_href_falls_back_to_prior_href() {
514 let body = r#"<a href="https://example.com/earlier/path">x</a> <a name="anchor">text {{x | lid: 'ai8kexrxcp03'}}</a>"#;
520 let r = templatize_body(body, FieldKind::ContentBlock);
521 assert_eq!(r.entries.len(), 1);
522 match &r.entries[0] {
523 DetectedEntry::Lid { url, .. } => {
524 assert_eq!(url.as_deref(), Some("https://example.com/earlier/path"));
525 }
526 _ => panic!("expected Lid"),
527 }
528 }
529
530 #[test]
531 fn vml_roundrect_href_anchors_lid() {
532 let body = r#"<v:roundrect xmlns:v="urn:schemas-microsoft-com:vml" href="https://hokto.example.com/page/?lid={{${cblid} | lid: 'ulab324mjv2a'}}" style="…"></v:roundrect>"#;
537 let r = templatize_body(body, FieldKind::ContentBlock);
538 assert_eq!(r.entries.len(), 1);
539 match &r.entries[0] {
540 DetectedEntry::Lid { key, url, value } => {
541 assert_eq!(value, "ulab324mjv2a");
542 assert_eq!(url.as_deref(), Some("https://hokto.example.com/page/"));
543 assert_eq!(key, "page");
544 }
545 _ => panic!("expected Lid"),
546 }
547 assert!(
548 r.warnings.is_empty(),
549 "VML href should not trigger no-anchor warning, got: {:?}",
550 r.warnings
551 );
552 }
553
554 #[test]
555 fn svg_anchor_xlink_href_anchors_lid() {
556 let body = r#"<svg:a xlink:href="https://example.com/svg/path/?lid={{x | lid: 'ai8kexrxcp03'}}"><svg:rect/></svg:a>"#;
558 let r = templatize_body(body, FieldKind::ContentBlock);
559 assert_eq!(r.entries.len(), 1);
560 match &r.entries[0] {
561 DetectedEntry::Lid { key, url, .. } => {
562 assert_eq!(key, "path");
563 assert_eq!(url.as_deref(), Some("https://example.com/svg/path/"));
564 }
565 _ => panic!("expected Lid"),
566 }
567 }
568
569 #[test]
570 fn vml_then_anchor_to_same_url_dedupes_with_suffix() {
571 let body = r#"
576<v:roundrect href="https://example.com/promo/?lid={{x | lid: 'aaaaaaaa1111'}}"></v:roundrect>
577<a href="https://example.com/promo/?lid={{x | lid: 'bbbbbbbb2222'}}">label</a>"#;
578 let r = templatize_body(body, FieldKind::ContentBlock);
579 assert_eq!(r.entries.len(), 2);
580 let keys: Vec<&str> = r.entries.iter().map(DetectedEntry::key).collect();
581 assert_eq!(keys, ["promo", "promo_2"]);
582 assert!(r.warnings.is_empty(), "no warnings expected");
583 }
584
585 #[test]
586 fn data_prefixed_attrs_are_not_treated_as_url_anchor() {
587 let body = r#"<button data-action="track" data-href="ignored">{{x | lid: 'ulab324mjv2a'}}</button>"#;
588 let r = templatize_body(body, FieldKind::ContentBlock);
589 assert_eq!(r.entries.len(), 1);
590 match &r.entries[0] {
591 DetectedEntry::Lid { key, url, value } => {
592 assert_eq!(value, "ulab324mjv2a");
593 assert!(
594 url.is_none(),
595 "data-* attributes must not be treated as URL anchors, got url={url:?}"
596 );
597 assert!(
598 key.starts_with("link_"),
599 "expected sequential link_ fallback, got key={key}"
600 );
601 }
602 _ => panic!("expected Lid"),
603 }
604 }
605
606 #[test]
607 fn lid_inside_non_url_attr_does_not_inherit_prior_anchor_href() {
608 let body = r#"<a href="https://example.com/promo/">prev</a><custom data-x="{{x | lid: 'abcd0000zzzz'}}"></custom>"#;
611 let r = templatize_body(body, FieldKind::ContentBlock);
612 assert_eq!(r.entries.len(), 1);
613 match &r.entries[0] {
614 DetectedEntry::Lid { key, url, value } => {
615 assert_eq!(value, "abcd0000zzzz");
616 assert!(
617 url.is_none(),
618 "lid inside a non-URL attribute must not inherit a prior <a href>, got url={url:?}"
619 );
620 assert!(
621 key.starts_with("link_"),
622 "expected sequential link_ fallback, got key={key}"
623 );
624 }
625 _ => panic!("expected Lid"),
626 }
627 }
628
629 #[test]
630 fn url_path_tail_uses_last_nonempty_segment() {
631 assert_eq!(
632 url_path_tail("https://example.com/promo/spring-sale"),
633 "spring-sale"
634 );
635 assert_eq!(url_path_tail("https://example.com/"), "");
636 assert_eq!(url_path_tail("https://example.com"), "");
637 }
638}