Skip to main content

braze_sync/values/
placeholder.rs

1//! Placeholder extraction and resolution for `__BRAZESYNC.<type>.<key>__`.
2//!
3//! Syntax is fixed by RFC `feat-per-env-values.md` §2.3:
4//!   - Double-underscore envelope
5//!   - Dot namespace
6//!   - `<type>` ∈ {`lid`, `cb_id`, `custom`, `global`}
7//!   - `<key>` matches `^[a-z][a-z0-9_]*$`
8//!
9//! This module is intentionally *resource-shape-agnostic*: it returns the
10//! `(type, key)` pairs and lets callers (Phase 2+ wiring) pick the right
11//! namespace (resource-local vs global, field-scoped vs resource-scoped).
12
13use regex_lite::Regex;
14use std::collections::BTreeMap;
15use std::sync::OnceLock;
16
17/// Placeholder type. Matches RFC §2.3 enumeration exactly.
18#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
19pub enum PlaceholderType {
20    Lid,
21    CbId,
22    Custom,
23    Global,
24}
25
26impl PlaceholderType {
27    pub fn as_str(&self) -> &'static str {
28        match self {
29            PlaceholderType::Lid => "lid",
30            PlaceholderType::CbId => "cb_id",
31            PlaceholderType::Custom => "custom",
32            PlaceholderType::Global => "global",
33        }
34    }
35
36    fn parse(s: &str) -> Option<Self> {
37        match s {
38            "lid" => Some(Self::Lid),
39            "cb_id" => Some(Self::CbId),
40            "custom" => Some(Self::Custom),
41            "global" => Some(Self::Global),
42            _ => None,
43        }
44    }
45}
46
47/// One placeholder occurrence within a body string.
48#[derive(Debug, Clone, PartialEq, Eq)]
49pub struct Placeholder {
50    pub ty: PlaceholderType,
51    pub key: String,
52    /// Byte offset where the literal `__BRAZESYNC.…__` token begins.
53    pub start: usize,
54    /// Byte offset (exclusive) where it ends.
55    pub end: usize,
56}
57
58impl Placeholder {
59    /// The textual form, useful for error messages: `__BRAZESYNC.lid.foo__`.
60    pub fn literal(&self) -> String {
61        format!("__BRAZESYNC.{}.{}__", self.ty.as_str(), self.key)
62    }
63}
64
65const PREFIX: &str = "__BRAZESYNC.";
66const CLOSE: &str = "__";
67
68fn key_re() -> &'static Regex {
69    static RE: OnceLock<Regex> = OnceLock::new();
70    RE.get_or_init(|| Regex::new(r"^[a-z][a-z0-9_]*$").expect("key regex is valid"))
71}
72
73/// Loose envelope-only regex per RFC §2.3 warning rule. Catches typos like
74/// `__BRAZSYNC.…__` or unknown types like `__BRAZESYNC.url.foo__` so they
75/// can be surfaced as warnings rather than silently passing through. The
76/// inner classes deliberately allow `_` so typo-shaped placeholders whose
77/// key contains an underscore (e.g. `spring_sale`) are still caught.
78fn loose_re() -> &'static Regex {
79    static RE: OnceLock<Regex> = OnceLock::new();
80    RE.get_or_init(|| {
81        Regex::new(r"__BRAZE?SYNC\.[A-Za-z0-9_]+\.[A-Za-z0-9_]+__")
82            .expect("loose placeholder regex is valid")
83    })
84}
85
86/// Extract every strict `__BRAZESYNC.<type>.<key>__` occurrence in `body`,
87/// in order of appearance.
88///
89/// Parsing strategy: anchor on the literal `__BRAZESYNC.` prefix and the
90/// *nearest* closing `__` (left-most), so a regex with greedy `[a-z0-9_]*`
91/// can't merge two adjacent placeholders into one.
92///
93/// Legacy recovery: v0.14.2 and earlier emitted exactly two trailing-`_`
94/// keys — `lid.link_` and `cb_id.cb_` (empty-slug fallbacks; see
95/// [`slug_for_cb_id`] / [`slug_for_lid`]). The rendered envelope collapses
96/// to e.g. `…link___` (key's trailing `_` + close `__`). Recovery is
97/// scoped to those two exact `(type, key)` pairs so that hand-written
98/// bodies like `__BRAZESYNC.custom.foo___bar` continue to parse as
99/// key=`foo` + literal `_bar` (rather than silently mutating the key into
100/// `foo_`). The double-`_` guard additionally keeps
101/// `__BRAZESYNC.lid.link____bar__` parsed as key=`link` rather than
102/// absorbing the adjacent `__bar__` token.
103pub fn extract_placeholders(body: &str) -> Vec<Placeholder> {
104    let mut out = Vec::new();
105    let bytes = body.as_bytes();
106    let mut i = 0;
107    while i + PREFIX.len() <= bytes.len() {
108        let Some(rel) = body[i..].find(PREFIX) else {
109            break;
110        };
111        let start = i + rel;
112        let inner_start = start + PREFIX.len();
113        let Some(rel_close) = body[inner_start..].find(CLOSE) else {
114            break;
115        };
116        let close_start = inner_start + rel_close;
117        let mut end = close_start + CLOSE.len();
118        let inner = &body[inner_start..close_start];
119        if let Some((ty_str, key)) = inner.split_once('.') {
120            if let (Some(ty), true) = (PlaceholderType::parse(ty_str), key_re().is_match(key)) {
121                let is_legacy_empty_slug = (ty == PlaceholderType::Lid && key == "link")
122                    || (ty == PlaceholderType::CbId && key == "cb");
123                let mut key = key.to_string();
124                if is_legacy_empty_slug
125                    && bytes.get(end) == Some(&b'_')
126                    && bytes.get(end + 1) != Some(&b'_')
127                {
128                    key.push('_');
129                    end += 1;
130                }
131                out.push(Placeholder {
132                    ty,
133                    key,
134                    start,
135                    end,
136                });
137                i = end;
138                continue;
139            }
140        }
141        // Not a valid strict placeholder; skip past the opening `__` so the
142        // remainder is still scanned (and may be surfaced by the loose pass).
143        i = start + CLOSE.len();
144    }
145    out
146}
147
148/// Find loose envelope matches that don't satisfy the strict pattern.
149/// Caller surfaces these as warnings (RFC §2.3).
150pub fn find_suspicious_placeholders(body: &str) -> Vec<String> {
151    let strict_spans: Vec<(usize, usize)> = extract_placeholders(body)
152        .into_iter()
153        .map(|p| (p.start, p.end))
154        .collect();
155    loose_re()
156        .find_iter(body)
157        .filter(|m| {
158            // Overlap (not equality) — the greedy loose regex can extend past
159            // a valid envelope into adjacent `__...__` text. See regression
160            // tests below.
161            !strict_spans
162                .iter()
163                .any(|&(s, e)| m.start() < e && s < m.end())
164        })
165        .map(|m| m.as_str().to_string())
166        .collect()
167}
168
169/// What the resolver couldn't satisfy. Aggregated by the pre-flight phase
170/// (RFC §2.4 / §3 Q7) so apply abort can report every failure at once.
171#[derive(Debug, Clone, PartialEq, Eq)]
172pub enum ResolutionError {
173    UnknownKey {
174        ty: PlaceholderType,
175        key: String,
176        start: usize,
177    },
178    /// Same `lid` key referenced more than once in a single body / field.
179    /// RFC §5 edge case: lid is a per-click-context ID so re-use is
180    /// conceptually wrong — abort rather than substitute the same value.
181    /// `occurrences` holds the byte offsets of every reference so the
182    /// failure report can point operators at the duplicates directly.
183    DuplicateLidKey {
184        key: String,
185        occurrences: Vec<usize>,
186    },
187}
188
189/// Flat key for the resolver's lookup table.
190///
191/// Phase 1 deliberately stays resource-shape-agnostic: callers supply a
192/// flat `(type, key) -> value` map and the resolver doesn't know whether
193/// it came from a resource-local namespace, a field-level namespace, or
194/// the `globals.custom` scope. Phase 2+ wiring composes the table from
195/// the right places per RFC §2.2.
196pub type LookupKey = (PlaceholderType, String);
197
198/// Resolve every placeholder in `body` against `lookup`. Returns the
199/// resolved body on success, or every unresolved placeholder on failure
200/// (errors are aggregated, never short-circuited — matches §3 Q7).
201pub fn resolve_placeholders(
202    body: &str,
203    lookup: &BTreeMap<LookupKey, String>,
204) -> Result<String, Vec<ResolutionError>> {
205    let placeholders = extract_placeholders(body);
206    let mut errors = Vec::new();
207
208    let mut lid_occurrences: BTreeMap<String, Vec<usize>> = BTreeMap::new();
209    for ph in &placeholders {
210        if matches!(ph.ty, PlaceholderType::Lid) {
211            lid_occurrences
212                .entry(ph.key.clone())
213                .or_default()
214                .push(ph.start);
215        }
216    }
217    for (key, occurrences) in lid_occurrences {
218        if occurrences.len() > 1 {
219            errors.push(ResolutionError::DuplicateLidKey { key, occurrences });
220        }
221    }
222
223    for ph in &placeholders {
224        let key: LookupKey = (ph.ty, ph.key.clone());
225        if !lookup.contains_key(&key) {
226            errors.push(ResolutionError::UnknownKey {
227                ty: ph.ty,
228                key: ph.key.clone(),
229                start: ph.start,
230            });
231        }
232    }
233
234    if !errors.is_empty() {
235        return Err(errors);
236    }
237
238    // Substitute back-to-front so byte offsets remain stable across edits.
239    let mut out = body.to_string();
240    for ph in placeholders.iter().rev() {
241        let key: LookupKey = (ph.ty, ph.key.clone());
242        let value = lookup
243            .get(&key)
244            .expect("missing key would have been caught above");
245        out.replace_range(ph.start..ph.end, value);
246    }
247    Ok(out)
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253
254    fn lookup(pairs: &[(PlaceholderType, &str, &str)]) -> BTreeMap<LookupKey, String> {
255        pairs
256            .iter()
257            .map(|(t, k, v)| ((*t, (*k).to_string()), (*v).to_string()))
258            .collect()
259    }
260
261    #[test]
262    fn extracts_strict_placeholders_in_order() {
263        let body = "head __BRAZESYNC.lid.spring_sale__ mid __BRAZESYNC.cb_id.cb_hero__ tail";
264        let found = extract_placeholders(body);
265        assert_eq!(found.len(), 2);
266        assert_eq!(found[0].ty, PlaceholderType::Lid);
267        assert_eq!(found[0].key, "spring_sale");
268        assert_eq!(found[1].ty, PlaceholderType::CbId);
269        assert_eq!(found[1].key, "cb_hero");
270        assert!(found[0].start < found[1].start);
271    }
272
273    #[test]
274    fn rejects_unknown_type_in_strict_pass() {
275        let body = "x __BRAZESYNC.url.foo__ y";
276        assert!(extract_placeholders(body).is_empty());
277    }
278
279    #[test]
280    fn rejects_uppercase_key_in_strict_pass() {
281        let body = "x __BRAZESYNC.lid.Foo__ y";
282        assert!(extract_placeholders(body).is_empty());
283    }
284
285    #[test]
286    fn rejects_digit_leading_key_in_strict_pass() {
287        let body = "x __BRAZESYNC.lid.1foo__ y";
288        assert!(extract_placeholders(body).is_empty());
289    }
290
291    #[test]
292    fn suspicious_picks_up_typos_and_unknown_types() {
293        let body = "x __BRAZSYNC.lid.foo__ y __BRAZESYNC.url.bar__ z";
294        let warns = find_suspicious_placeholders(body);
295        assert_eq!(warns.len(), 2);
296        assert!(warns.iter().any(|s| s.contains("BRAZSYNC")));
297        assert!(warns.iter().any(|s| s.contains(".url.")));
298    }
299
300    #[test]
301    fn suspicious_excludes_strict_matches() {
302        let body = "__BRAZESYNC.lid.ok__";
303        assert!(find_suspicious_placeholders(body).is_empty());
304    }
305
306    #[test]
307    fn suspicious_ignores_trailing_double_underscore_text() {
308        // Regression: greedy loose regex extends past a valid placeholder
309        // into `__bold__`-style adjacent text and reports a span like
310        // (0, 26) for `__BRAZESYNC.lid.foo__bar__`. That span overlaps the
311        // real strict placeholder (0, 21), so it must not be surfaced.
312        let body = "__BRAZESYNC.lid.foo__bar__";
313        assert!(find_suspicious_placeholders(body).is_empty());
314    }
315
316    #[test]
317    fn suspicious_ignores_adjacent_placeholders_sharing_underscores() {
318        // Regression: with two adjacent strict placeholders joined by an
319        // extra `__`, the loose regex finds a single match that overlaps
320        // both strict spans. Overlap means "already covered" — no warning.
321        let body = "__BRAZESYNC.lid.foo____BRAZESYNC.lid.bar__";
322        assert!(find_suspicious_placeholders(body).is_empty());
323    }
324
325    #[test]
326    fn resolves_when_all_keys_present() {
327        let body = "before __BRAZESYNC.lid.cta__ middle __BRAZESYNC.custom.host__ end";
328        let map = lookup(&[
329            (PlaceholderType::Lid, "cta", "ai8kexrxcp03"),
330            (PlaceholderType::Custom, "host", "api-prod.example.com"),
331        ]);
332        let resolved = resolve_placeholders(body, &map).unwrap();
333        assert_eq!(
334            resolved,
335            "before ai8kexrxcp03 middle api-prod.example.com end"
336        );
337    }
338
339    #[test]
340    fn resolves_repeated_keys_to_same_value() {
341        let body = "__BRAZESYNC.global.host__/a __BRAZESYNC.global.host__/b";
342        let map = lookup(&[(PlaceholderType::Global, "host", "example.com")]);
343        let resolved = resolve_placeholders(body, &map).unwrap();
344        assert_eq!(resolved, "example.com/a example.com/b");
345    }
346
347    #[test]
348    fn aggregates_unresolved_keys() {
349        let body = "__BRAZESYNC.lid.a__ __BRAZESYNC.cb_id.b__ __BRAZESYNC.custom.c__";
350        let map = lookup(&[(PlaceholderType::Lid, "a", "ai8kexrxcp03")]);
351        let err = resolve_placeholders(body, &map).unwrap_err();
352        assert_eq!(err.len(), 2);
353        let keys: Vec<_> = err
354            .iter()
355            .map(|e| match e {
356                ResolutionError::UnknownKey { ty, key, .. } => (*ty, key.clone()),
357                ResolutionError::DuplicateLidKey { .. } => unreachable!(),
358            })
359            .collect();
360        assert!(keys.contains(&(PlaceholderType::CbId, "b".to_string())));
361        assert!(keys.contains(&(PlaceholderType::Custom, "c".to_string())));
362    }
363
364    #[test]
365    fn placeholder_literal_round_trips() {
366        let ph = Placeholder {
367            ty: PlaceholderType::CbId,
368            key: "cb_hero".into(),
369            start: 0,
370            end: 0,
371        };
372        assert_eq!(ph.literal(), "__BRAZESYNC.cb_id.cb_hero__");
373    }
374
375    #[test]
376    fn duplicate_lid_aborts_with_dedicated_error() {
377        let body = "<a>__BRAZESYNC.lid.cta__</a> <a>__BRAZESYNC.lid.cta__</a>";
378        let map = lookup(&[(PlaceholderType::Lid, "cta", "ai8kexrxcp03")]);
379        let err = resolve_placeholders(body, &map).unwrap_err();
380        assert!(err.iter().any(|e| matches!(
381            e,
382            ResolutionError::DuplicateLidKey { key, occurrences }
383                if key == "cta" && occurrences.len() == 2
384        )));
385    }
386
387    #[test]
388    fn duplicate_cb_id_is_not_an_error() {
389        // cb_id / custom / global re-use is normal substitution per §5.
390        let body = "{{cb.__BRAZESYNC.cb_id.x__}} {{cb.__BRAZESYNC.cb_id.x__}}";
391        let map = lookup(&[(PlaceholderType::CbId, "x", "cb42")]);
392        let out = resolve_placeholders(body, &map).unwrap();
393        assert_eq!(out, "{{cb.cb42}} {{cb.cb42}}");
394    }
395
396    #[test]
397    fn body_without_placeholders_passes_through() {
398        let body = "no placeholders here";
399        let map = BTreeMap::new();
400        assert_eq!(resolve_placeholders(body, &map).unwrap(), body);
401    }
402
403    #[test]
404    fn suspicious_catches_typo_with_underscore_key() {
405        // Regression: loose regex used to use `[^_\s]+` which excluded
406        // underscores, silently letting `__BRAZSYNC.lid.spring_sale__`
407        // (missing `E`, underscored key) through without a warning.
408        let body = "__BRAZSYNC.lid.spring_sale__";
409        let warns = find_suspicious_placeholders(body);
410        assert_eq!(warns, vec!["__BRAZSYNC.lid.spring_sale__".to_string()]);
411    }
412
413    #[test]
414    fn does_not_swallow_text_across_envelope_boundary() {
415        // Regression: a regex with a greedy `[a-z0-9_]*` key class merged
416        // `__BRAZESYNC.lid.foo__hello__BRAZESYNC.lid.bar__` into one
417        // placeholder with key=`foo__hello`. The parser must stop at the
418        // nearest `__` so both placeholders are recovered.
419        let body = "__BRAZESYNC.lid.foo__hello__BRAZESYNC.lid.bar__";
420        let ps = extract_placeholders(body);
421        assert_eq!(ps.len(), 2);
422        assert_eq!(ps[0].key, "foo");
423        assert_eq!(ps[1].key, "bar");
424        assert_eq!(&body[ps[0].start..ps[0].end], "__BRAZESYNC.lid.foo__");
425        assert_eq!(&body[ps[1].start..ps[1].end], "__BRAZESYNC.lid.bar__");
426    }
427
428    #[test]
429    fn adjacent_placeholders_share_no_underscore() {
430        // `____` between two placeholders: prior greedy regex captured
431        // key=`foo__`. The parser should treat the inner `__` as the close.
432        let body = "__BRAZESYNC.lid.foo____BRAZESYNC.lid.bar__";
433        let ps = extract_placeholders(body);
434        assert_eq!(ps.len(), 2);
435        assert_eq!(ps[0].key, "foo");
436        assert_eq!(ps[1].key, "bar");
437    }
438
439    #[test]
440    fn trailing_underscore_key_extracts_when_envelope_appears_to_have_three_underscores() {
441        // Regression for v0.14.2 templatize output: when a URL slug is
442        // empty the fallback key is `link_`, and the rendered envelope
443        // collapses to `___` (close `__` + trailing `_` from the key).
444        // Greedy-rightmost parse must pick key=`link_`.
445        let body = "lid: '__BRAZESYNC.lid.link___'";
446        let ps = extract_placeholders(body);
447        assert_eq!(ps.len(), 1);
448        assert_eq!(ps[0].key, "link_");
449        assert_eq!(ps[0].ty, PlaceholderType::Lid);
450    }
451
452    #[test]
453    fn placeholder_followed_by_unrelated_double_underscore_token_does_not_absorb_it() {
454        // Regression: an earlier right-most-close strategy would greedily
455        // extend the key across any adjacent `[a-z0-9_]+__` token, e.g.
456        // Python `__init__` or Markdown bold immediately after a
457        // placeholder. The parser must stop at the nearest `__` close
458        // (with at most one trailing `_` for legacy recovery) and leave
459        // the rest of the body untouched.
460        let body = "__BRAZESYNC.lid.foo____bar__";
461        let ps = extract_placeholders(body);
462        assert_eq!(ps.len(), 1);
463        assert_eq!(ps[0].key, "foo");
464        assert_eq!(&body[ps[0].end..], "__bar__");
465    }
466
467    #[test]
468    fn non_legacy_key_followed_by_underscore_text_is_not_absorbed() {
469        // Recovery is gated on the v0.14.2 empty-slug fallbacks
470        // (`lid.link_`, `cb_id.cb_`); any other `(type, key)` must
471        // leave a trailing `_<text>` in the surrounding body.
472        let body = "__BRAZESYNC.custom.foo___bar";
473        let ps = extract_placeholders(body);
474        assert_eq!(ps.len(), 1);
475        assert_eq!(ps[0].key, "foo");
476        assert_eq!(ps[0].ty, PlaceholderType::Custom);
477        assert_eq!(&body[ps[0].end..], "_bar");
478
479        let body = "__BRAZESYNC.lid.other___tail";
480        let ps = extract_placeholders(body);
481        assert_eq!(ps.len(), 1);
482        assert_eq!(ps[0].key, "other");
483        assert_eq!(&body[ps[0].end..], "_tail");
484    }
485
486    #[test]
487    fn cb_id_empty_slug_fallback_extracts_with_trailing_underscore() {
488        let body = "__BRAZESYNC.cb_id.cb___";
489        let ps = extract_placeholders(body);
490        assert_eq!(ps.len(), 1);
491        assert_eq!(ps[0].key, "cb_");
492        assert_eq!(ps[0].ty, PlaceholderType::CbId);
493    }
494
495    #[test]
496    fn unresolved_trailing_underscore_key_reports_full_key() {
497        // If `link_` isn't in the values map, the error must name
498        // `link_` (not `link`) so the operator can fix the right entry.
499        let body = "__BRAZESYNC.lid.link___";
500        let map = lookup(&[(PlaceholderType::Lid, "ok", "ai8kexrxcp03")]);
501        let err = resolve_placeholders(body, &map).unwrap_err();
502        assert!(err.iter().any(|e| matches!(
503            e,
504            ResolutionError::UnknownKey { key, .. } if key == "link_"
505        )));
506    }
507
508    #[test]
509    fn underscored_keys_still_extract() {
510        // Sanity: legitimate underscored keys (RFC §2.3 allows them) are
511        // not broken by the boundary-respecting parser.
512        let body = "__BRAZESYNC.lid.spring_sale__ x __BRAZESYNC.custom.api_host__";
513        let ps = extract_placeholders(body);
514        assert_eq!(ps.len(), 2);
515        assert_eq!(ps[0].key, "spring_sale");
516        assert_eq!(ps[1].key, "api_host");
517    }
518}