1use regex_lite::Regex;
9use std::collections::BTreeMap;
10use std::sync::OnceLock;
11
12use crate::values::correlation::{normalize_url, slug_for_cb_id, slug_for_lid};
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum FieldKind {
20 ContentBlock,
21 EmailHtmlBody,
22 EmailPlainBody,
23 EmailSubject,
24 EmailPreheader,
25}
26
27impl FieldKind {
28 pub fn supports_html_anchor(self) -> bool {
29 matches!(self, FieldKind::ContentBlock | FieldKind::EmailHtmlBody)
30 }
31 pub fn supports_plaintext_anchor(self) -> bool {
32 matches!(self, FieldKind::EmailPlainBody)
33 }
34}
35
36#[derive(Debug, Clone, PartialEq, Eq)]
39pub enum DetectedEntry {
40 Lid {
41 key: String,
42 value: String,
43 url: Option<String>,
47 },
48 CbId {
49 key: String,
50 value: String,
51 name: String,
54 },
55}
56
57impl DetectedEntry {
58 pub fn key(&self) -> &str {
59 match self {
60 DetectedEntry::Lid { key, .. } | DetectedEntry::CbId { key, .. } => key,
61 }
62 }
63}
64
65#[derive(Debug, Clone)]
67pub struct TemplatizedField {
68 pub new_body: String,
69 pub entries: Vec<DetectedEntry>,
70 pub warnings: Vec<String>,
73}
74
75pub fn templatize_body(body: &str, field: FieldKind) -> TemplatizedField {
85 let mut spans: Vec<DetectionSpan> = Vec::new();
86 let mut used_lid_keys: BTreeMap<String, usize> = BTreeMap::new();
89 let mut used_cb_id_keys: BTreeMap<String, usize> = BTreeMap::new();
90 let mut cb_id_name_to_key: BTreeMap<String, String> = BTreeMap::new();
95 let mut warnings: Vec<String> = Vec::new();
96
97 for m in lid_match_re().captures_iter(body) {
99 let whole = m.get(0).expect("group 0 always present");
100 let value = m
101 .get(1)
102 .or(m.get(2))
103 .map(|g| g.as_str().to_string())
104 .expect("one of the value alternates matches");
105
106 let (url, key) = name_lid_for_field(body, whole.start(), field, &mut used_lid_keys);
107 if url.is_none() && !matches!(field, FieldKind::EmailSubject | FieldKind::EmailPreheader) {
108 warnings.push(format!(
109 "lid '{value}' at byte {} has no URL anchor; using sequential key '{key}'",
110 whole.start()
111 ));
112 }
113 if matches!(field, FieldKind::EmailSubject | FieldKind::EmailPreheader) {
114 warnings.push(format!(
120 "lid '{value}' detected in subject/preheader (key '{key}'); \
121 `export` does not refresh these — non-canonical env \
122 values files must be edited manually"
123 ));
124 }
125 spans.push(DetectionSpan {
126 range: whole.range(),
127 replacement: format!("| lid: '__BRAZESYNC.lid.{key}__'"),
128 entry: DetectedEntry::Lid { key, value, url },
129 });
130 }
131
132 for m in cb_id_match_re().captures_iter(body) {
134 let whole = m.get(0).expect("group 0 always present");
135 let name = m.get(1).expect("name capture present").as_str().to_string();
136 let value = m
137 .get(2)
138 .or(m.get(3))
139 .map(|g| g.as_str().to_string())
140 .expect("cbN capture present");
141 let key = match cb_id_name_to_key.get(&name) {
144 Some(prior) => prior.clone(),
145 None => {
146 let k = unique_key(slug_for_cb_id(&name), &mut used_cb_id_keys);
147 cb_id_name_to_key.insert(name.clone(), k.clone());
148 k
149 }
150 };
151 let replacement =
154 format!("{{{{content_blocks.${{{name}}} | id: '__BRAZESYNC.cb_id.{key}__'}}}}");
155 spans.push(DetectionSpan {
156 range: whole.range(),
157 replacement,
158 entry: DetectedEntry::CbId { key, value, name },
159 });
160 }
161
162 spans.sort_by_key(|s| s.range.start);
164 let mut new_body = body.to_string();
165 let mut entries_in_order: Vec<DetectedEntry> = Vec::with_capacity(spans.len());
166 for s in &spans {
167 entries_in_order.push(s.entry.clone());
168 }
169 for s in spans.into_iter().rev() {
170 new_body.replace_range(s.range, &s.replacement);
171 }
172
173 TemplatizedField {
174 new_body,
175 entries: entries_in_order,
176 warnings,
177 }
178}
179
180struct DetectionSpan {
181 range: std::ops::Range<usize>,
182 replacement: String,
183 entry: DetectedEntry,
184}
185
186fn lid_match_re() -> &'static Regex {
187 static RE: OnceLock<Regex> = OnceLock::new();
188 RE.get_or_init(|| {
189 Regex::new(r#"\|\s*lid:\s*(?:"([a-z0-9]{8,})"|'([a-z0-9]{8,})')"#)
191 .expect("lid match regex is valid")
192 })
193}
194
195fn cb_id_match_re() -> &'static Regex {
196 static RE: OnceLock<Regex> = OnceLock::new();
197 RE.get_or_init(|| {
198 Regex::new(
199 r#"\{\{\s*content_blocks\.\$\{\s*([^\s}|]+)\s*\}\s*\|\s*id:\s*(?:"(cb[0-9]+)"|'(cb[0-9]+)')\s*\}\}"#,
200 )
201 .expect("cb_id match regex is valid")
202 })
203}
204
205fn href_re() -> &'static Regex {
206 static RE: OnceLock<Regex> = OnceLock::new();
207 RE.get_or_init(|| {
208 Regex::new(r#"(?i)<a\b[^>]*?\bhref\s*=\s*(?:"([^"]*)"|'([^']*)')"#)
209 .expect("href regex is valid")
210 })
211}
212
213fn plaintext_url_re() -> &'static Regex {
214 static RE: OnceLock<Regex> = OnceLock::new();
215 RE.get_or_init(|| Regex::new(r#"https?://[^\s<>"']+"#).expect("plaintext URL regex is valid"))
216}
217
218fn name_lid_for_field(
219 body: &str,
220 lid_token_offset: usize,
221 field: FieldKind,
222 used: &mut BTreeMap<String, usize>,
223) -> (Option<String>, String) {
224 let url = preceding_url(body, lid_token_offset, field);
225 let key_source: String = match &url {
226 Some(u) => url_path_tail(u).to_string(),
227 None => String::new(),
228 };
229 let slug = slug_for_lid(&key_source);
230 let key = unique_key(slug, used);
231 (url, key)
232}
233
234fn preceding_url(body: &str, lid_token_offset: usize, field: FieldKind) -> Option<String> {
235 let prefix = &body[..lid_token_offset];
236 let raw = if field.supports_html_anchor() {
237 href_re()
239 .captures_iter(prefix)
240 .last()
241 .and_then(|cap| cap.get(1).or(cap.get(2)))
242 .map(|m| m.as_str().to_string())
243 } else if field.supports_plaintext_anchor() {
244 plaintext_url_re()
245 .find_iter(prefix)
246 .last()
247 .map(|m| m.as_str().to_string())
248 } else {
249 None
250 };
251 raw.map(|r| normalize_url(&r))
252}
253
254fn url_path_tail(url: &str) -> String {
255 let after_scheme = url.split_once("://").map(|(_, r)| r).unwrap_or(url);
260 let path_start = after_scheme
261 .find('/')
262 .map(|i| i + 1)
263 .unwrap_or(after_scheme.len());
264 let path = &after_scheme[path_start..];
265 path.rsplit('/')
266 .find(|s| !s.is_empty())
267 .unwrap_or("")
268 .to_string()
269}
270
271fn unique_key(base: String, used: &mut BTreeMap<String, usize>) -> String {
272 let count = used.entry(base.clone()).or_insert(0);
273 *count += 1;
274 if *count == 1 {
275 base
276 } else {
277 format!("{base}_{count}")
278 }
279}
280
281#[cfg(test)]
282mod tests {
283 use super::*;
284
285 #[test]
286 fn idempotent_on_already_templatized_body() {
287 let body = "<p>__BRAZESYNC.lid.cta__ kept verbatim</p>";
288 let r = templatize_body(body, FieldKind::ContentBlock);
289 assert_eq!(r.new_body, body);
290 assert!(r.entries.is_empty());
291 }
292
293 #[test]
294 fn rewrites_html_lid_with_url_anchor() {
295 let body = r#"<a href="https://example.com/spring-sale">{{x | lid: 'ai8kexrxcp03'}}</a>"#;
296 let r = templatize_body(body, FieldKind::ContentBlock);
297 assert!(r.new_body.contains("__BRAZESYNC.lid.spring_sale__"));
298 assert_eq!(r.entries.len(), 1);
299 match &r.entries[0] {
300 DetectedEntry::Lid { key, value, url } => {
301 assert_eq!(key, "spring_sale");
302 assert_eq!(value, "ai8kexrxcp03");
303 assert_eq!(url.as_deref(), Some("https://example.com/spring-sale"));
304 }
305 _ => panic!("expected Lid"),
306 }
307 }
308
309 #[test]
310 fn rewrites_cb_id_include() {
311 let body = "{{content_blocks.${promo_banner} | id: 'cb42'}}";
312 let r = templatize_body(body, FieldKind::ContentBlock);
313 assert!(r.new_body.contains("__BRAZESYNC.cb_id.promo_banner__"));
314 assert!(r.new_body.contains("${promo_banner}"));
316 assert_eq!(r.entries.len(), 1);
317 }
318
319 #[test]
320 fn dedupes_duplicate_url_with_sequential_suffix() {
321 let body = r#"
322<a href="https://example.com/cta">{{x | lid: 'ai8kexrxcp03'}}A</a>
323<a href="https://example.com/cta">{{x | lid: 'bj9lfsysxq14'}}B</a>"#;
324 let r = templatize_body(body, FieldKind::ContentBlock);
325 let keys: Vec<&str> = r.entries.iter().map(DetectedEntry::key).collect();
326 assert_eq!(keys, ["cta", "cta_2"]);
327 }
328
329 #[test]
330 fn plaintext_url_anchor_works() {
331 let body = "Click https://example.com/promo {{x | lid: 'ai8kexrxcp03'}} now.";
332 let r = templatize_body(body, FieldKind::EmailPlainBody);
333 match &r.entries[0] {
334 DetectedEntry::Lid { key, url, .. } => {
335 assert_eq!(key, "promo");
336 assert_eq!(url.as_deref(), Some("https://example.com/promo"));
337 }
338 _ => panic!(),
339 }
340 }
341
342 #[test]
343 fn subject_lid_warns_about_export_refresh_gap() {
344 let body = "Hello {{x | lid: 'ai8kexrxcp03'}} world";
348 let r = templatize_body(body, FieldKind::EmailSubject);
349 assert!(
350 r.warnings
351 .iter()
352 .any(|w| w.contains("export") && w.contains("subject")),
353 "expected manual-maintenance warning, got: {:?}",
354 r.warnings
355 );
356 match &r.entries[0] {
357 DetectedEntry::Lid { key, url, .. } => {
358 assert_eq!(key, "link_");
359 assert!(url.is_none());
360 }
361 _ => panic!(),
362 }
363 }
364
365 #[test]
366 fn repeated_cb_id_name_reuses_key() {
367 let body = "{{content_blocks.${promo} | id: 'cb10'}} ... \
371 {{content_blocks.${promo} | id: 'cb10'}}";
372 let r = templatize_body(body, FieldKind::ContentBlock);
373 assert_eq!(r.entries.len(), 2, "both occurrences detected");
374 assert_eq!(r.entries[0].key(), "promo");
375 assert_eq!(
376 r.entries[1].key(),
377 "promo",
378 "same ${{NAME}} must reuse the key"
379 );
380 }
381
382 #[test]
383 fn partially_templatized_body_picks_up_remaining_raw_lid() {
384 let body = r#"
387<a href="https://example.com/cta">{{ x | lid: '__BRAZESYNC.lid.cta__' }}A</a>
388<a href="https://example.com/promo">{{ x | lid: 'rawvalue1234' }}B</a>"#;
389 let r = templatize_body(body, FieldKind::ContentBlock);
390 assert_eq!(r.entries.len(), 1, "the raw lid must be detected");
391 match &r.entries[0] {
392 DetectedEntry::Lid { key, value, .. } => {
393 assert_eq!(key, "promo");
394 assert_eq!(value, "rawvalue1234");
395 }
396 _ => panic!("expected Lid"),
397 }
398 }
399
400 #[test]
401 fn html_lid_without_anchor_warns() {
402 let body = "{{x | lid: 'ai8kexrxcp03'}} just floating";
405 let r = templatize_body(body, FieldKind::EmailHtmlBody);
406 assert_eq!(r.entries.len(), 1);
407 assert!(!r.warnings.is_empty());
408 }
409
410 #[test]
411 fn url_path_tail_uses_last_nonempty_segment() {
412 assert_eq!(
413 url_path_tail("https://example.com/promo/spring-sale"),
414 "spring-sale"
415 );
416 assert_eq!(url_path_tail("https://example.com/"), "");
417 assert_eq!(url_path_tail("https://example.com"), "");
418 }
419}