Skip to main content

braze_sync/values/
placeholder.rs

1//! Placeholder extraction and resolution for `__BRAZESYNC.<type>.<key>__`.
2//!
3//! v0.15 model: only Braze-managed types (`lid`, `cb_id`) are
4//! recognized. Both resolve at apply/diff time from the live remote
5//! body via URL / `${NAME}` anchor correlation (see
6//! [`crate::values::braze_managed`]).
7//!
8//! Syntax:
9//!   - Double-underscore envelope: `__BRAZESYNC.…__`
10//!   - Dot namespace
11//!   - `<type>` ∈ {`lid`, `cb_id`}
12//!   - `<key>` matches `^[a-z][a-z0-9_]*$`
13
14use regex_lite::Regex;
15use std::collections::BTreeMap;
16use std::sync::OnceLock;
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
19pub enum PlaceholderType {
20    Lid,
21    CbId,
22}
23
24impl PlaceholderType {
25    pub fn as_str(&self) -> &'static str {
26        match self {
27            PlaceholderType::Lid => "lid",
28            PlaceholderType::CbId => "cb_id",
29        }
30    }
31
32    fn parse(s: &str) -> Option<Self> {
33        match s {
34            "lid" => Some(Self::Lid),
35            "cb_id" => Some(Self::CbId),
36            _ => None,
37        }
38    }
39}
40
41/// One placeholder occurrence within a body string.
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub struct Placeholder {
44    pub ty: PlaceholderType,
45    pub key: String,
46    /// Byte offset where the literal `__BRAZESYNC.…__` token begins.
47    pub start: usize,
48    /// Byte offset (exclusive) where it ends.
49    pub end: usize,
50}
51
52impl Placeholder {
53    /// The textual form, useful for error messages: `__BRAZESYNC.lid.foo__`.
54    pub fn literal(&self) -> String {
55        format!("__BRAZESYNC.{}.{}__", self.ty.as_str(), self.key)
56    }
57}
58
59const PREFIX: &str = "__BRAZESYNC.";
60const CLOSE: &str = "__";
61
62fn key_re() -> &'static Regex {
63    static RE: OnceLock<Regex> = OnceLock::new();
64    RE.get_or_init(|| Regex::new(r"^[a-z][a-z0-9_]*$").expect("key regex is valid"))
65}
66
67/// Loose envelope-only regex. Catches typos like `__BRAZSYNC.…__` or
68/// unknown / retired types (`__BRAZESYNC.url.foo__`,
69/// `__BRAZESYNC.custom.foo__`) so they surface as warnings rather than
70/// silently passing through. Inner classes allow `_` so typo-shaped
71/// placeholders whose key contains an underscore are still caught.
72fn loose_re() -> &'static Regex {
73    static RE: OnceLock<Regex> = OnceLock::new();
74    RE.get_or_init(|| {
75        Regex::new(r"__BRAZE?SYNC\.[A-Za-z0-9_]+\.[A-Za-z0-9_]+__")
76            .expect("loose placeholder regex is valid")
77    })
78}
79
80pub fn extract_placeholders(body: &str) -> Vec<Placeholder> {
81    let mut out = Vec::new();
82    let mut i = 0;
83    while i + PREFIX.len() <= body.len() {
84        let Some(rel) = body[i..].find(PREFIX) else {
85            break;
86        };
87        let start = i + rel;
88        let inner_start = start + PREFIX.len();
89        let Some(rel_close) = body[inner_start..].find(CLOSE) else {
90            break;
91        };
92        let close_start = inner_start + rel_close;
93        let end = close_start + CLOSE.len();
94        let inner = &body[inner_start..close_start];
95        if let Some((ty_str, key)) = inner.split_once('.') {
96            if let (Some(ty), true) = (PlaceholderType::parse(ty_str), key_re().is_match(key)) {
97                out.push(Placeholder {
98                    ty,
99                    key: key.to_string(),
100                    start,
101                    end,
102                });
103                i = end;
104                continue;
105            }
106        }
107        // Not a valid strict placeholder; skip past the opening `__` so the
108        // remainder is still scanned (and may be surfaced by the loose pass).
109        i = start + CLOSE.len();
110    }
111    out
112}
113
114/// Find loose envelope matches that don't satisfy the strict pattern.
115pub fn find_suspicious_placeholders(body: &str) -> Vec<String> {
116    let strict_spans: Vec<(usize, usize)> = extract_placeholders(body)
117        .into_iter()
118        .map(|p| (p.start, p.end))
119        .collect();
120    loose_re()
121        .find_iter(body)
122        .filter(|m| {
123            // Overlap (not equality) — the greedy loose regex can extend past
124            // a valid envelope into adjacent `__...__` text. See regression
125            // tests below.
126            !strict_spans
127                .iter()
128                .any(|&(s, e)| m.start() < e && s < m.end())
129        })
130        .map(|m| m.as_str().to_string())
131        .collect()
132}
133
134#[derive(Debug, Clone, PartialEq, Eq)]
135pub enum ResolutionError {
136    UnknownKey {
137        ty: PlaceholderType,
138        key: String,
139        start: usize,
140    },
141    /// lid is per-click-context — reuse of the same key is an error.
142    DuplicateLidKey {
143        key: String,
144        occurrences: Vec<usize>,
145    },
146    /// Token uses the `__BRAZESYNC.` prefix but with a retired namespace
147    /// (e.g. `custom`, `global`) that is no longer resolved.
148    RetiredNamespace { token: String },
149}
150
151pub type LookupKey = (PlaceholderType, String);
152
153/// Resolve every placeholder in `body` against `lookup`. Returns the
154/// resolved body on success, or every unresolved placeholder on failure.
155pub fn resolve_placeholders(
156    body: &str,
157    lookup: &BTreeMap<LookupKey, String>,
158) -> Result<String, Vec<ResolutionError>> {
159    let placeholders = extract_placeholders(body);
160    let mut errors = Vec::new();
161
162    let mut lid_occurrences: BTreeMap<String, Vec<usize>> = BTreeMap::new();
163    for ph in &placeholders {
164        if matches!(ph.ty, PlaceholderType::Lid) {
165            lid_occurrences
166                .entry(ph.key.clone())
167                .or_default()
168                .push(ph.start);
169        }
170    }
171    for (key, occurrences) in lid_occurrences {
172        if occurrences.len() > 1 {
173            errors.push(ResolutionError::DuplicateLidKey { key, occurrences });
174        }
175    }
176
177    for ph in &placeholders {
178        let key: LookupKey = (ph.ty, ph.key.clone());
179        if !lookup.contains_key(&key) {
180            errors.push(ResolutionError::UnknownKey {
181                ty: ph.ty,
182                key: ph.key.clone(),
183                start: ph.start,
184            });
185        }
186    }
187
188    if !errors.is_empty() {
189        return Err(errors);
190    }
191
192    // Substitute back-to-front so byte offsets remain stable across edits.
193    let mut out = body.to_string();
194    for ph in placeholders.iter().rev() {
195        let key: LookupKey = (ph.ty, ph.key.clone());
196        let value = lookup
197            .get(&key)
198            .expect("missing key would have been caught above");
199        out.replace_range(ph.start..ph.end, value);
200    }
201    Ok(out)
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207
208    fn lookup(pairs: &[(PlaceholderType, &str, &str)]) -> BTreeMap<LookupKey, String> {
209        pairs
210            .iter()
211            .map(|(t, k, v)| ((*t, (*k).to_string()), (*v).to_string()))
212            .collect()
213    }
214
215    #[test]
216    fn extracts_strict_placeholders_in_order() {
217        let body = "head __BRAZESYNC.lid.spring_sale__ mid __BRAZESYNC.cb_id.cb_hero__ tail";
218        let found = extract_placeholders(body);
219        assert_eq!(found.len(), 2);
220        assert_eq!(found[0].ty, PlaceholderType::Lid);
221        assert_eq!(found[0].key, "spring_sale");
222        assert_eq!(found[1].ty, PlaceholderType::CbId);
223        assert_eq!(found[1].key, "cb_hero");
224        assert!(found[0].start < found[1].start);
225    }
226
227    #[test]
228    fn rejects_unknown_type_in_strict_pass() {
229        let body = "x __BRAZESYNC.url.foo__ y";
230        assert!(extract_placeholders(body).is_empty());
231    }
232
233    #[test]
234    fn rejects_uppercase_key_in_strict_pass() {
235        let body = "x __BRAZESYNC.lid.Foo__ y";
236        assert!(extract_placeholders(body).is_empty());
237    }
238
239    #[test]
240    fn rejects_digit_leading_key_in_strict_pass() {
241        let body = "x __BRAZESYNC.lid.1foo__ y";
242        assert!(extract_placeholders(body).is_empty());
243    }
244
245    #[test]
246    fn suspicious_picks_up_typos_and_unknown_types() {
247        let body = "x __BRAZSYNC.lid.foo__ y __BRAZESYNC.url.bar__ z";
248        let warns = find_suspicious_placeholders(body);
249        assert_eq!(warns.len(), 2);
250        assert!(warns.iter().any(|s| s.contains("BRAZSYNC")));
251        assert!(warns.iter().any(|s| s.contains(".url.")));
252    }
253
254    #[test]
255    fn suspicious_excludes_strict_matches() {
256        let body = "__BRAZESYNC.lid.ok__";
257        assert!(find_suspicious_placeholders(body).is_empty());
258    }
259
260    #[test]
261    fn suspicious_ignores_trailing_double_underscore_text() {
262        // Regression: greedy loose regex extends past a valid placeholder
263        // into `__bold__`-style adjacent text and reports a span like
264        // (0, 26) for `__BRAZESYNC.lid.foo__bar__`. That span overlaps the
265        // real strict placeholder (0, 21), so it must not be surfaced.
266        let body = "__BRAZESYNC.lid.foo__bar__";
267        assert!(find_suspicious_placeholders(body).is_empty());
268    }
269
270    #[test]
271    fn suspicious_ignores_adjacent_placeholders_sharing_underscores() {
272        // Regression: with two adjacent strict placeholders joined by an
273        // extra `__`, the loose regex finds a single match that overlaps
274        // both strict spans. Overlap means "already covered" — no warning.
275        let body = "__BRAZESYNC.lid.foo____BRAZESYNC.lid.bar__";
276        assert!(find_suspicious_placeholders(body).is_empty());
277    }
278
279    #[test]
280    fn resolves_when_all_keys_present() {
281        let body = "before __BRAZESYNC.lid.cta__ middle __BRAZESYNC.cb_id.shared__ end";
282        let map = lookup(&[
283            (PlaceholderType::Lid, "cta", "ai8kexrxcp03"),
284            (PlaceholderType::CbId, "shared", "cb42"),
285        ]);
286        let resolved = resolve_placeholders(body, &map).unwrap();
287        assert_eq!(resolved, "before ai8kexrxcp03 middle cb42 end");
288    }
289
290    #[test]
291    fn resolves_repeated_cb_id_to_same_value() {
292        let body = "{{__BRAZESYNC.cb_id.shared__}}/a {{__BRAZESYNC.cb_id.shared__}}/b";
293        let map = lookup(&[(PlaceholderType::CbId, "shared", "cb42")]);
294        let resolved = resolve_placeholders(body, &map).unwrap();
295        assert_eq!(resolved, "{{cb42}}/a {{cb42}}/b");
296    }
297
298    #[test]
299    fn aggregates_unresolved_keys() {
300        let body = "__BRAZESYNC.lid.a__ __BRAZESYNC.cb_id.b__ __BRAZESYNC.cb_id.c__";
301        let map = lookup(&[(PlaceholderType::Lid, "a", "ai8kexrxcp03")]);
302        let err = resolve_placeholders(body, &map).unwrap_err();
303        assert_eq!(err.len(), 2);
304        let keys: Vec<_> = err
305            .iter()
306            .map(|e| match e {
307                ResolutionError::UnknownKey { ty, key, .. } => (*ty, key.clone()),
308                _ => unreachable!(),
309            })
310            .collect();
311        assert!(keys.contains(&(PlaceholderType::CbId, "b".to_string())));
312        assert!(keys.contains(&(PlaceholderType::CbId, "c".to_string())));
313    }
314
315    #[test]
316    fn placeholder_literal_round_trips() {
317        let ph = Placeholder {
318            ty: PlaceholderType::CbId,
319            key: "cb_hero".into(),
320            start: 0,
321            end: 0,
322        };
323        assert_eq!(ph.literal(), "__BRAZESYNC.cb_id.cb_hero__");
324    }
325
326    #[test]
327    fn duplicate_lid_aborts_with_dedicated_error() {
328        let body = "<a>__BRAZESYNC.lid.cta__</a> <a>__BRAZESYNC.lid.cta__</a>";
329        let map = lookup(&[(PlaceholderType::Lid, "cta", "ai8kexrxcp03")]);
330        let err = resolve_placeholders(body, &map).unwrap_err();
331        assert!(err.iter().any(|e| matches!(
332            e,
333            ResolutionError::DuplicateLidKey { key, occurrences }
334                if key == "cta" && occurrences.len() == 2
335        )));
336    }
337
338    #[test]
339    fn duplicate_cb_id_is_not_an_error() {
340        // cb_id re-use is normal (same block referenced twice).
341        let body = "{{cb.__BRAZESYNC.cb_id.x__}} {{cb.__BRAZESYNC.cb_id.x__}}";
342        let map = lookup(&[(PlaceholderType::CbId, "x", "cb42")]);
343        let out = resolve_placeholders(body, &map).unwrap();
344        assert_eq!(out, "{{cb.cb42}} {{cb.cb42}}");
345    }
346
347    #[test]
348    fn body_without_placeholders_passes_through() {
349        let body = "no placeholders here";
350        let map = BTreeMap::new();
351        assert_eq!(resolve_placeholders(body, &map).unwrap(), body);
352    }
353
354    #[test]
355    fn suspicious_catches_typo_with_underscore_key() {
356        // Regression: loose regex used to use `[^_\s]+` which excluded
357        // underscores, silently letting `__BRAZSYNC.lid.spring_sale__`
358        // (missing `E`, underscored key) through without a warning.
359        let body = "__BRAZSYNC.lid.spring_sale__";
360        let warns = find_suspicious_placeholders(body);
361        assert_eq!(warns, vec!["__BRAZSYNC.lid.spring_sale__".to_string()]);
362    }
363
364    #[test]
365    fn does_not_swallow_text_across_envelope_boundary() {
366        // Regression: a regex with a greedy `[a-z0-9_]*` key class merged
367        // `__BRAZESYNC.lid.foo__hello__BRAZESYNC.lid.bar__` into one
368        // placeholder with key=`foo__hello`. The parser must stop at the
369        // nearest `__` so both placeholders are recovered.
370        let body = "__BRAZESYNC.lid.foo__hello__BRAZESYNC.lid.bar__";
371        let ps = extract_placeholders(body);
372        assert_eq!(ps.len(), 2);
373        assert_eq!(ps[0].key, "foo");
374        assert_eq!(ps[1].key, "bar");
375        assert_eq!(&body[ps[0].start..ps[0].end], "__BRAZESYNC.lid.foo__");
376        assert_eq!(&body[ps[1].start..ps[1].end], "__BRAZESYNC.lid.bar__");
377    }
378
379    #[test]
380    fn adjacent_placeholders_share_no_underscore() {
381        // `____` between two placeholders: prior greedy regex captured
382        // key=`foo__`. The parser should treat the inner `__` as the close.
383        let body = "__BRAZESYNC.lid.foo____BRAZESYNC.lid.bar__";
384        let ps = extract_placeholders(body);
385        assert_eq!(ps.len(), 2);
386        assert_eq!(ps[0].key, "foo");
387        assert_eq!(ps[1].key, "bar");
388    }
389
390    #[test]
391    fn stops_at_nearest_close_envelope() {
392        let body = "__BRAZESYNC.lid.foo____bar__";
393        let ps = extract_placeholders(body);
394        assert_eq!(ps.len(), 1);
395        assert_eq!(ps[0].key, "foo");
396        assert_eq!(&body[ps[0].end..], "__bar__");
397    }
398
399    #[test]
400    fn triple_underscore_parses_as_key_plus_trailing() {
401        // `__BRAZESYNC.lid.link___` → key=`link`, remaining `_`
402        let body = "__BRAZESYNC.lid.link___";
403        let ps = extract_placeholders(body);
404        assert_eq!(ps.len(), 1);
405        assert_eq!(ps[0].key, "link");
406        assert_eq!(&body[ps[0].end..], "_");
407    }
408
409    #[test]
410    fn underscored_keys_still_extract() {
411        // Sanity: legitimate underscored keys are not broken by the
412        // boundary-respecting parser.
413        let body = "__BRAZESYNC.lid.spring_sale__ x __BRAZESYNC.cb_id.promo_banner__";
414        let ps = extract_placeholders(body);
415        assert_eq!(ps.len(), 2);
416        assert_eq!(ps[0].key, "spring_sale");
417        assert_eq!(ps[1].key, "promo_banner");
418    }
419}