Skip to main content

braze_sync/values/
templatize.rs

1//! Migration pass: raw-lid / raw-cb_id bodies → templated bodies + values.
2//!
3//! Powers `braze-sync templatize` (RFC §2.7). All functions in this
4//! module are pure — they take a body string + field kind, and return
5//! the rewritten body together with the per-occurrence detection
6//! metadata the CLI orchestrator uses to populate values files.
7
8use regex_lite::Regex;
9use std::collections::BTreeMap;
10use std::sync::OnceLock;
11
12use crate::values::correlation::{normalize_url, slug_for_cb_id, slug_for_lid};
13
14/// Which Liquid context the body belongs to. Determines:
15/// - what kind of URL anchor lid detection should look for (HTML vs raw)
16/// - whether lid detection without a URL anchor should produce a
17///   sequential `link_N` key (deferred for subject/preheader v0.14)
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum FieldKind {
20    ContentBlock,
21    EmailHtmlBody,
22    EmailPlainBody,
23    EmailSubject,
24    EmailPreheader,
25}
26
27impl FieldKind {
28    pub fn supports_html_anchor(self) -> bool {
29        matches!(self, FieldKind::ContentBlock | FieldKind::EmailHtmlBody)
30    }
31    pub fn supports_plaintext_anchor(self) -> bool {
32        matches!(self, FieldKind::EmailPlainBody)
33    }
34}
35
36/// One placeholder produced by templatization, with the metadata the
37/// caller needs to update `values/<env>.yaml`.
38#[derive(Debug, Clone, PartialEq, Eq)]
39pub enum DetectedEntry {
40    Lid {
41        key: String,
42        value: String,
43        /// Normalized URL anchor, when this field has one. `None` for
44        /// subject/preheader where lid auto-detection currently falls
45        /// back to a sequential `link_N` key (no URL to anchor on).
46        url: Option<String>,
47    },
48    CbId {
49        key: String,
50        value: String,
51        /// The original Liquid `${NAME}` identifier; recorded for
52        /// debugging and so the slug round-trips back.
53        name: String,
54    },
55}
56
57impl DetectedEntry {
58    pub fn key(&self) -> &str {
59        match self {
60            DetectedEntry::Lid { key, .. } | DetectedEntry::CbId { key, .. } => key,
61        }
62    }
63}
64
65/// Result of templatizing one body field.
66#[derive(Debug, Clone)]
67pub struct TemplatizedField {
68    pub new_body: String,
69    pub entries: Vec<DetectedEntry>,
70    /// Warnings the CLI should surface (e.g. lid in subject/preheader
71    /// where we don't have a robust anchor).
72    pub warnings: Vec<String>,
73}
74
75/// Detect every `| lid: '<value>'` and `{{content_blocks.${NAME} | id: 'cbN'}}`
76/// in `body`, rewrite to `__BRAZESYNC.<type>.<key>__` placeholders,
77/// and return the rewritten body together with the per-occurrence
78/// detection metadata. Idempotent by construction: detection regexes
79/// require raw lid (`[a-z0-9]{8,}`) / cb_id (`cb[0-9]+`) literals, so
80/// already-templated `__BRAZESYNC.*__` placeholders never re-match.
81/// This means a partially-templatized body (existing placeholders
82/// alongside remaining raw values) still gets the raw values picked up,
83/// instead of being silently skipped.
84pub fn templatize_body(body: &str, field: FieldKind) -> TemplatizedField {
85    let mut spans: Vec<DetectionSpan> = Vec::new();
86    // Order matters per RFC §3 Q3 connumber fallback: detect lids in
87    // appearance order, dedup keys by sequential suffix.
88    let mut used_lid_keys: BTreeMap<String, usize> = BTreeMap::new();
89    let mut used_cb_id_keys: BTreeMap<String, usize> = BTreeMap::new();
90    // Repeated `${NAME}` cb_id references must reuse the same key so
91    // export refresh (which correlates by NAME) can match every
92    // occurrence. Without this, the second `${promo}` would slug to
93    // `promo_2` and refresh would never find a remote match.
94    let mut cb_id_name_to_key: BTreeMap<String, String> = BTreeMap::new();
95    let mut warnings: Vec<String> = Vec::new();
96
97    // --- lid detection ---
98    for m in lid_match_re().captures_iter(body) {
99        let whole = m.get(0).expect("group 0 always present");
100        let value = m
101            .get(1)
102            .or(m.get(2))
103            .map(|g| g.as_str().to_string())
104            .expect("one of the value alternates matches");
105
106        let (url, key) = name_lid_for_field(body, whole.start(), field, &mut used_lid_keys);
107        if url.is_none() && !matches!(field, FieldKind::EmailSubject | FieldKind::EmailPreheader) {
108            warnings.push(format!(
109                "lid '{value}' at byte {} has no URL anchor; using sequential key '{key}'",
110                whole.start()
111            ));
112        }
113        if matches!(field, FieldKind::EmailSubject | FieldKind::EmailPreheader) {
114            // Phase 3 export does NOT refresh subject/preheader lid
115            // entries (see exporter.rs refresh path). Skeleton files
116            // produced for other envs will therefore stay `value: null`
117            // until manually edited. Surface this once per detection so
118            // the operator knows the canonical/skeleton gap exists.
119            warnings.push(format!(
120                "lid '{value}' detected in subject/preheader (key '{key}'); \
121                 `export` does not refresh these — non-canonical env \
122                 values files must be edited manually"
123            ));
124        }
125        spans.push(DetectionSpan {
126            range: whole.range(),
127            replacement: format!("| lid: '__BRAZESYNC.lid.{key}__'"),
128            entry: DetectedEntry::Lid { key, value, url },
129        });
130    }
131
132    // --- cb_id detection ---
133    for m in cb_id_match_re().captures_iter(body) {
134        let whole = m.get(0).expect("group 0 always present");
135        let name = m.get(1).expect("name capture present").as_str().to_string();
136        let value = m
137            .get(2)
138            .or(m.get(3))
139            .map(|g| g.as_str().to_string())
140            .expect("cbN capture present");
141        // Same `${NAME}` referenced twice in one body → reuse the
142        // first key so export refresh matches every occurrence.
143        let key = match cb_id_name_to_key.get(&name) {
144            Some(prior) => prior.clone(),
145            None => {
146                let k = unique_key(slug_for_cb_id(&name), &mut used_cb_id_keys);
147                cb_id_name_to_key.insert(name.clone(), k.clone());
148                k
149            }
150        };
151        // Preserve the original `${NAME}` form so cb_id correlation in
152        // export keeps working.
153        let replacement =
154            format!("{{{{content_blocks.${{{name}}} | id: '__BRAZESYNC.cb_id.{key}__'}}}}");
155        spans.push(DetectionSpan {
156            range: whole.range(),
157            replacement,
158            entry: DetectedEntry::CbId { key, value, name },
159        });
160    }
161
162    // Apply spans back-to-front so earlier byte offsets remain valid.
163    spans.sort_by_key(|s| s.range.start);
164    let mut new_body = body.to_string();
165    let mut entries_in_order: Vec<DetectedEntry> = Vec::with_capacity(spans.len());
166    for s in &spans {
167        entries_in_order.push(s.entry.clone());
168    }
169    for s in spans.into_iter().rev() {
170        new_body.replace_range(s.range, &s.replacement);
171    }
172
173    TemplatizedField {
174        new_body,
175        entries: entries_in_order,
176        warnings,
177    }
178}
179
180struct DetectionSpan {
181    range: std::ops::Range<usize>,
182    replacement: String,
183    entry: DetectedEntry,
184}
185
186fn lid_match_re() -> &'static Regex {
187    static RE: OnceLock<Regex> = OnceLock::new();
188    RE.get_or_init(|| {
189        // RFC §2.7 step 2: pipe-anchored, dual-quote, min length 8.
190        Regex::new(r#"\|\s*lid:\s*(?:"([a-z0-9]{8,})"|'([a-z0-9]{8,})')"#)
191            .expect("lid match regex is valid")
192    })
193}
194
195fn cb_id_match_re() -> &'static Regex {
196    static RE: OnceLock<Regex> = OnceLock::new();
197    RE.get_or_init(|| {
198        Regex::new(
199            r#"\{\{\s*content_blocks\.\$\{\s*([^\s}|]+)\s*\}\s*\|\s*id:\s*(?:"(cb[0-9]+)"|'(cb[0-9]+)')\s*\}\}"#,
200        )
201        .expect("cb_id match regex is valid")
202    })
203}
204
205fn href_re() -> &'static Regex {
206    static RE: OnceLock<Regex> = OnceLock::new();
207    RE.get_or_init(|| {
208        Regex::new(r#"(?i)<a\b[^>]*?\bhref\s*=\s*(?:"([^"]*)"|'([^']*)')"#)
209            .expect("href regex is valid")
210    })
211}
212
213fn plaintext_url_re() -> &'static Regex {
214    static RE: OnceLock<Regex> = OnceLock::new();
215    RE.get_or_init(|| Regex::new(r#"https?://[^\s<>"']+"#).expect("plaintext URL regex is valid"))
216}
217
218fn name_lid_for_field(
219    body: &str,
220    lid_token_offset: usize,
221    field: FieldKind,
222    used: &mut BTreeMap<String, usize>,
223) -> (Option<String>, String) {
224    let url = preceding_url(body, lid_token_offset, field);
225    let key_source: String = match &url {
226        Some(u) => url_path_tail(u).to_string(),
227        None => String::new(),
228    };
229    let slug = slug_for_lid(&key_source);
230    let key = unique_key(slug, used);
231    (url, key)
232}
233
234fn preceding_url(body: &str, lid_token_offset: usize, field: FieldKind) -> Option<String> {
235    let prefix = &body[..lid_token_offset];
236    let raw = if field.supports_html_anchor() {
237        // Use the LAST href before the lid token.
238        href_re()
239            .captures_iter(prefix)
240            .last()
241            .and_then(|cap| cap.get(1).or(cap.get(2)))
242            .map(|m| m.as_str().to_string())
243    } else if field.supports_plaintext_anchor() {
244        plaintext_url_re()
245            .find_iter(prefix)
246            .last()
247            .map(|m| m.as_str().to_string())
248    } else {
249        None
250    };
251    raw.map(|r| normalize_url(&r))
252}
253
254fn url_path_tail(url: &str) -> String {
255    // Strip scheme://host and any leading slashes; take the last
256    // non-empty path component. `https://example.com/promo/spring-sale`
257    // → `spring-sale`. Bare host or trailing slash → empty (caller
258    // applies the `link_` fallback via slug_for_lid).
259    let after_scheme = url.split_once("://").map(|(_, r)| r).unwrap_or(url);
260    let path_start = after_scheme
261        .find('/')
262        .map(|i| i + 1)
263        .unwrap_or(after_scheme.len());
264    let path = &after_scheme[path_start..];
265    path.rsplit('/')
266        .find(|s| !s.is_empty())
267        .unwrap_or("")
268        .to_string()
269}
270
271fn unique_key(base: String, used: &mut BTreeMap<String, usize>) -> String {
272    let count = used.entry(base.clone()).or_insert(0);
273    *count += 1;
274    if *count == 1 {
275        base
276    } else {
277        format!("{base}_{count}")
278    }
279}
280
281#[cfg(test)]
282mod tests {
283    use super::*;
284
285    #[test]
286    fn idempotent_on_already_templatized_body() {
287        let body = "<p>__BRAZESYNC.lid.cta__ kept verbatim</p>";
288        let r = templatize_body(body, FieldKind::ContentBlock);
289        assert_eq!(r.new_body, body);
290        assert!(r.entries.is_empty());
291    }
292
293    #[test]
294    fn rewrites_html_lid_with_url_anchor() {
295        let body = r#"<a href="https://example.com/spring-sale">{{x | lid: 'ai8kexrxcp03'}}</a>"#;
296        let r = templatize_body(body, FieldKind::ContentBlock);
297        assert!(r.new_body.contains("__BRAZESYNC.lid.spring_sale__"));
298        assert_eq!(r.entries.len(), 1);
299        match &r.entries[0] {
300            DetectedEntry::Lid { key, value, url } => {
301                assert_eq!(key, "spring_sale");
302                assert_eq!(value, "ai8kexrxcp03");
303                assert_eq!(url.as_deref(), Some("https://example.com/spring-sale"));
304            }
305            _ => panic!("expected Lid"),
306        }
307    }
308
309    #[test]
310    fn rewrites_cb_id_include() {
311        let body = "{{content_blocks.${promo_banner} | id: 'cb42'}}";
312        let r = templatize_body(body, FieldKind::ContentBlock);
313        assert!(r.new_body.contains("__BRAZESYNC.cb_id.promo_banner__"));
314        // Preserves ${NAME} so export correlation still works.
315        assert!(r.new_body.contains("${promo_banner}"));
316        assert_eq!(r.entries.len(), 1);
317    }
318
319    #[test]
320    fn dedupes_duplicate_url_with_sequential_suffix() {
321        let body = r#"
322<a href="https://example.com/cta">{{x | lid: 'ai8kexrxcp03'}}A</a>
323<a href="https://example.com/cta">{{x | lid: 'bj9lfsysxq14'}}B</a>"#;
324        let r = templatize_body(body, FieldKind::ContentBlock);
325        let keys: Vec<&str> = r.entries.iter().map(DetectedEntry::key).collect();
326        assert_eq!(keys, ["cta", "cta_2"]);
327    }
328
329    #[test]
330    fn plaintext_url_anchor_works() {
331        let body = "Click https://example.com/promo {{x | lid: 'ai8kexrxcp03'}} now.";
332        let r = templatize_body(body, FieldKind::EmailPlainBody);
333        match &r.entries[0] {
334            DetectedEntry::Lid { key, url, .. } => {
335                assert_eq!(key, "promo");
336                assert_eq!(url.as_deref(), Some("https://example.com/promo"));
337            }
338            _ => panic!(),
339        }
340    }
341
342    #[test]
343    fn subject_lid_warns_about_export_refresh_gap() {
344        // subject has no URL anchor — slug falls back to `link_`. The
345        // CLI must surface that `export` won't refresh this entry for
346        // other envs so the operator knows to maintain values manually.
347        let body = "Hello {{x | lid: 'ai8kexrxcp03'}} world";
348        let r = templatize_body(body, FieldKind::EmailSubject);
349        assert!(
350            r.warnings
351                .iter()
352                .any(|w| w.contains("export") && w.contains("subject")),
353            "expected manual-maintenance warning, got: {:?}",
354            r.warnings
355        );
356        match &r.entries[0] {
357            DetectedEntry::Lid { key, url, .. } => {
358                assert_eq!(key, "link_");
359                assert!(url.is_none());
360            }
361            _ => panic!(),
362        }
363    }
364
365    #[test]
366    fn repeated_cb_id_name_reuses_key() {
367        // RFC: same `${NAME}` resolves to the same content_block. The
368        // values file must have ONE entry for it, not `name` + `name_2`,
369        // otherwise export refresh can never populate the duplicates.
370        let body = "{{content_blocks.${promo} | id: 'cb10'}} ... \
371                    {{content_blocks.${promo} | id: 'cb10'}}";
372        let r = templatize_body(body, FieldKind::ContentBlock);
373        assert_eq!(r.entries.len(), 2, "both occurrences detected");
374        assert_eq!(r.entries[0].key(), "promo");
375        assert_eq!(
376            r.entries[1].key(),
377            "promo",
378            "same ${{NAME}} must reuse the key"
379        );
380    }
381
382    #[test]
383    fn partially_templatized_body_picks_up_remaining_raw_lid() {
384        // Mixed state: one lid already templated, another still raw.
385        // The raw one MUST be detected (no early-return short-circuit).
386        let body = r#"
387<a href="https://example.com/cta">{{ x | lid: '__BRAZESYNC.lid.cta__' }}A</a>
388<a href="https://example.com/promo">{{ x | lid: 'rawvalue1234' }}B</a>"#;
389        let r = templatize_body(body, FieldKind::ContentBlock);
390        assert_eq!(r.entries.len(), 1, "the raw lid must be detected");
391        match &r.entries[0] {
392            DetectedEntry::Lid { key, value, .. } => {
393                assert_eq!(key, "promo");
394                assert_eq!(value, "rawvalue1234");
395            }
396            _ => panic!("expected Lid"),
397        }
398    }
399
400    #[test]
401    fn html_lid_without_anchor_warns() {
402        // HTML body but the lid has no preceding <a href> — RFC says
403        // this should still produce a key but flag it for the operator.
404        let body = "{{x | lid: 'ai8kexrxcp03'}} just floating";
405        let r = templatize_body(body, FieldKind::EmailHtmlBody);
406        assert_eq!(r.entries.len(), 1);
407        assert!(!r.warnings.is_empty());
408    }
409
410    #[test]
411    fn url_path_tail_uses_last_nonempty_segment() {
412        assert_eq!(
413            url_path_tail("https://example.com/promo/spring-sale"),
414            "spring-sale"
415        );
416        assert_eq!(url_path_tail("https://example.com/"), "");
417        assert_eq!(url_path_tail("https://example.com"), "");
418    }
419}