Skip to main content

braze_sync/values/
placeholder.rs

1//! Placeholder extraction and resolution for `__BRAZESYNC.<type>.<key>__`.
2//!
3//! Syntax is fixed by RFC `feat-per-env-values.md` §2.3:
4//!   - Double-underscore envelope
5//!   - Dot namespace
6//!   - `<type>` ∈ {`lid`, `cb_id`, `custom`, `global`}
7//!   - `<key>` matches `^[a-z][a-z0-9_]*$`
8//!
9//! This module is intentionally *resource-shape-agnostic*: it returns the
10//! `(type, key)` pairs and lets callers (Phase 2+ wiring) pick the right
11//! namespace (resource-local vs global, field-scoped vs resource-scoped).
12
13use regex_lite::Regex;
14use std::collections::BTreeMap;
15use std::sync::OnceLock;
16
17/// Placeholder type. Matches RFC §2.3 enumeration exactly.
18#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
19pub enum PlaceholderType {
20    Lid,
21    CbId,
22    Custom,
23    Global,
24}
25
26impl PlaceholderType {
27    pub fn as_str(&self) -> &'static str {
28        match self {
29            PlaceholderType::Lid => "lid",
30            PlaceholderType::CbId => "cb_id",
31            PlaceholderType::Custom => "custom",
32            PlaceholderType::Global => "global",
33        }
34    }
35
36    fn parse(s: &str) -> Option<Self> {
37        match s {
38            "lid" => Some(Self::Lid),
39            "cb_id" => Some(Self::CbId),
40            "custom" => Some(Self::Custom),
41            "global" => Some(Self::Global),
42            _ => None,
43        }
44    }
45}
46
47/// One placeholder occurrence within a body string.
48#[derive(Debug, Clone, PartialEq, Eq)]
49pub struct Placeholder {
50    pub ty: PlaceholderType,
51    pub key: String,
52    /// Byte offset where the literal `__BRAZESYNC.…__` token begins.
53    pub start: usize,
54    /// Byte offset (exclusive) where it ends.
55    pub end: usize,
56}
57
58impl Placeholder {
59    /// The textual form, useful for error messages: `__BRAZESYNC.lid.foo__`.
60    pub fn literal(&self) -> String {
61        format!("__BRAZESYNC.{}.{}__", self.ty.as_str(), self.key)
62    }
63}
64
65const PREFIX: &str = "__BRAZESYNC.";
66const CLOSE: &str = "__";
67
68fn key_re() -> &'static Regex {
69    static RE: OnceLock<Regex> = OnceLock::new();
70    RE.get_or_init(|| Regex::new(r"^[a-z][a-z0-9_]*$").expect("key regex is valid"))
71}
72
73/// Loose envelope-only regex per RFC §2.3 warning rule. Catches typos like
74/// `__BRAZSYNC.…__` or unknown types like `__BRAZESYNC.url.foo__` so they
75/// can be surfaced as warnings rather than silently passing through. The
76/// inner classes deliberately allow `_` so typo-shaped placeholders whose
77/// key contains an underscore (e.g. `spring_sale`) are still caught.
78fn loose_re() -> &'static Regex {
79    static RE: OnceLock<Regex> = OnceLock::new();
80    RE.get_or_init(|| {
81        Regex::new(r"__BRAZE?SYNC\.[A-Za-z0-9_]+\.[A-Za-z0-9_]+__")
82            .expect("loose placeholder regex is valid")
83    })
84}
85
86/// Extract every strict `__BRAZESYNC.<type>.<key>__` occurrence in `body`,
87/// in order of appearance. Parsed by anchoring on the literal `__BRAZESYNC.`
88/// prefix and the *nearest* closing `__`, so the envelope is honored even
89/// when `<key>` legitimately contains underscores. A regex with a greedy
90/// `[a-z0-9_]*` key class would otherwise swallow text across a `__`
91/// boundary and merge two intended placeholders into one.
92pub fn extract_placeholders(body: &str) -> Vec<Placeholder> {
93    let mut out = Vec::new();
94    let bytes = body.as_bytes();
95    let mut i = 0;
96    while i + PREFIX.len() <= bytes.len() {
97        let Some(rel) = body[i..].find(PREFIX) else {
98            break;
99        };
100        let start = i + rel;
101        let inner_start = start + PREFIX.len();
102        let Some(rel_close) = body[inner_start..].find(CLOSE) else {
103            break;
104        };
105        let close_start = inner_start + rel_close;
106        let end = close_start + CLOSE.len();
107        let inner = &body[inner_start..close_start];
108        if let Some((ty_str, key)) = inner.split_once('.') {
109            if let (Some(ty), true) = (PlaceholderType::parse(ty_str), key_re().is_match(key)) {
110                out.push(Placeholder {
111                    ty,
112                    key: key.to_string(),
113                    start,
114                    end,
115                });
116                i = end;
117                continue;
118            }
119        }
120        // Not a valid strict placeholder; skip past the opening `__` so the
121        // remainder is still scanned (and may be surfaced by the loose pass).
122        i = start + CLOSE.len();
123    }
124    out
125}
126
127/// Find loose envelope matches that don't satisfy the strict pattern.
128/// Caller surfaces these as warnings (RFC §2.3).
129pub fn find_suspicious_placeholders(body: &str) -> Vec<String> {
130    let strict_spans: Vec<(usize, usize)> = extract_placeholders(body)
131        .into_iter()
132        .map(|p| (p.start, p.end))
133        .collect();
134    loose_re()
135        .find_iter(body)
136        .filter(|m| {
137            // Overlap (not equality) — the greedy loose regex can extend past
138            // a valid envelope into adjacent `__...__` text. See regression
139            // tests below.
140            !strict_spans
141                .iter()
142                .any(|&(s, e)| m.start() < e && s < m.end())
143        })
144        .map(|m| m.as_str().to_string())
145        .collect()
146}
147
148/// What the resolver couldn't satisfy. Aggregated by the pre-flight phase
149/// (RFC §2.4 / §3 Q7) so apply abort can report every failure at once.
150#[derive(Debug, Clone, PartialEq, Eq)]
151pub enum ResolutionError {
152    UnknownKey {
153        ty: PlaceholderType,
154        key: String,
155        start: usize,
156    },
157    /// Same `lid` key referenced more than once in a single body / field.
158    /// RFC §5 edge case: lid is a per-click-context ID so re-use is
159    /// conceptually wrong — abort rather than substitute the same value.
160    /// `occurrences` holds the byte offsets of every reference so the
161    /// failure report can point operators at the duplicates directly.
162    DuplicateLidKey {
163        key: String,
164        occurrences: Vec<usize>,
165    },
166}
167
168/// Flat key for the resolver's lookup table.
169///
170/// Phase 1 deliberately stays resource-shape-agnostic: callers supply a
171/// flat `(type, key) -> value` map and the resolver doesn't know whether
172/// it came from a resource-local namespace, a field-level namespace, or
173/// the `globals.custom` scope. Phase 2+ wiring composes the table from
174/// the right places per RFC §2.2.
175pub type LookupKey = (PlaceholderType, String);
176
177/// Resolve every placeholder in `body` against `lookup`. Returns the
178/// resolved body on success, or every unresolved placeholder on failure
179/// (errors are aggregated, never short-circuited — matches §3 Q7).
180pub fn resolve_placeholders(
181    body: &str,
182    lookup: &BTreeMap<LookupKey, String>,
183) -> Result<String, Vec<ResolutionError>> {
184    let placeholders = extract_placeholders(body);
185    let mut errors = Vec::new();
186
187    let mut lid_occurrences: BTreeMap<String, Vec<usize>> = BTreeMap::new();
188    for ph in &placeholders {
189        if matches!(ph.ty, PlaceholderType::Lid) {
190            lid_occurrences
191                .entry(ph.key.clone())
192                .or_default()
193                .push(ph.start);
194        }
195    }
196    for (key, occurrences) in lid_occurrences {
197        if occurrences.len() > 1 {
198            errors.push(ResolutionError::DuplicateLidKey { key, occurrences });
199        }
200    }
201
202    for ph in &placeholders {
203        let key: LookupKey = (ph.ty, ph.key.clone());
204        if !lookup.contains_key(&key) {
205            errors.push(ResolutionError::UnknownKey {
206                ty: ph.ty,
207                key: ph.key.clone(),
208                start: ph.start,
209            });
210        }
211    }
212
213    if !errors.is_empty() {
214        return Err(errors);
215    }
216
217    // Substitute back-to-front so byte offsets remain stable across edits.
218    let mut out = body.to_string();
219    for ph in placeholders.iter().rev() {
220        let key: LookupKey = (ph.ty, ph.key.clone());
221        let value = lookup
222            .get(&key)
223            .expect("missing key would have been caught above");
224        out.replace_range(ph.start..ph.end, value);
225    }
226    Ok(out)
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232
233    fn lookup(pairs: &[(PlaceholderType, &str, &str)]) -> BTreeMap<LookupKey, String> {
234        pairs
235            .iter()
236            .map(|(t, k, v)| ((*t, (*k).to_string()), (*v).to_string()))
237            .collect()
238    }
239
240    #[test]
241    fn extracts_strict_placeholders_in_order() {
242        let body = "head __BRAZESYNC.lid.spring_sale__ mid __BRAZESYNC.cb_id.cb_hero__ tail";
243        let found = extract_placeholders(body);
244        assert_eq!(found.len(), 2);
245        assert_eq!(found[0].ty, PlaceholderType::Lid);
246        assert_eq!(found[0].key, "spring_sale");
247        assert_eq!(found[1].ty, PlaceholderType::CbId);
248        assert_eq!(found[1].key, "cb_hero");
249        assert!(found[0].start < found[1].start);
250    }
251
252    #[test]
253    fn rejects_unknown_type_in_strict_pass() {
254        let body = "x __BRAZESYNC.url.foo__ y";
255        assert!(extract_placeholders(body).is_empty());
256    }
257
258    #[test]
259    fn rejects_uppercase_key_in_strict_pass() {
260        let body = "x __BRAZESYNC.lid.Foo__ y";
261        assert!(extract_placeholders(body).is_empty());
262    }
263
264    #[test]
265    fn rejects_digit_leading_key_in_strict_pass() {
266        let body = "x __BRAZESYNC.lid.1foo__ y";
267        assert!(extract_placeholders(body).is_empty());
268    }
269
270    #[test]
271    fn suspicious_picks_up_typos_and_unknown_types() {
272        let body = "x __BRAZSYNC.lid.foo__ y __BRAZESYNC.url.bar__ z";
273        let warns = find_suspicious_placeholders(body);
274        assert_eq!(warns.len(), 2);
275        assert!(warns.iter().any(|s| s.contains("BRAZSYNC")));
276        assert!(warns.iter().any(|s| s.contains(".url.")));
277    }
278
279    #[test]
280    fn suspicious_excludes_strict_matches() {
281        let body = "__BRAZESYNC.lid.ok__";
282        assert!(find_suspicious_placeholders(body).is_empty());
283    }
284
285    #[test]
286    fn suspicious_ignores_trailing_double_underscore_text() {
287        // Regression: greedy loose regex extends past a valid placeholder
288        // into `__bold__`-style adjacent text and reports a span like
289        // (0, 26) for `__BRAZESYNC.lid.foo__bar__`. That span overlaps the
290        // real strict placeholder (0, 21), so it must not be surfaced.
291        let body = "__BRAZESYNC.lid.foo__bar__";
292        assert!(find_suspicious_placeholders(body).is_empty());
293    }
294
295    #[test]
296    fn suspicious_ignores_adjacent_placeholders_sharing_underscores() {
297        // Regression: with two adjacent strict placeholders joined by an
298        // extra `__`, the loose regex finds a single match that overlaps
299        // both strict spans. Overlap means "already covered" — no warning.
300        let body = "__BRAZESYNC.lid.foo____BRAZESYNC.lid.bar__";
301        assert!(find_suspicious_placeholders(body).is_empty());
302    }
303
304    #[test]
305    fn resolves_when_all_keys_present() {
306        let body = "before __BRAZESYNC.lid.cta__ middle __BRAZESYNC.custom.host__ end";
307        let map = lookup(&[
308            (PlaceholderType::Lid, "cta", "ai8kexrxcp03"),
309            (PlaceholderType::Custom, "host", "api-prod.example.com"),
310        ]);
311        let resolved = resolve_placeholders(body, &map).unwrap();
312        assert_eq!(
313            resolved,
314            "before ai8kexrxcp03 middle api-prod.example.com end"
315        );
316    }
317
318    #[test]
319    fn resolves_repeated_keys_to_same_value() {
320        let body = "__BRAZESYNC.global.host__/a __BRAZESYNC.global.host__/b";
321        let map = lookup(&[(PlaceholderType::Global, "host", "example.com")]);
322        let resolved = resolve_placeholders(body, &map).unwrap();
323        assert_eq!(resolved, "example.com/a example.com/b");
324    }
325
326    #[test]
327    fn aggregates_unresolved_keys() {
328        let body = "__BRAZESYNC.lid.a__ __BRAZESYNC.cb_id.b__ __BRAZESYNC.custom.c__";
329        let map = lookup(&[(PlaceholderType::Lid, "a", "ai8kexrxcp03")]);
330        let err = resolve_placeholders(body, &map).unwrap_err();
331        assert_eq!(err.len(), 2);
332        let keys: Vec<_> = err
333            .iter()
334            .map(|e| match e {
335                ResolutionError::UnknownKey { ty, key, .. } => (*ty, key.clone()),
336                ResolutionError::DuplicateLidKey { .. } => unreachable!(),
337            })
338            .collect();
339        assert!(keys.contains(&(PlaceholderType::CbId, "b".to_string())));
340        assert!(keys.contains(&(PlaceholderType::Custom, "c".to_string())));
341    }
342
343    #[test]
344    fn placeholder_literal_round_trips() {
345        let ph = Placeholder {
346            ty: PlaceholderType::CbId,
347            key: "cb_hero".into(),
348            start: 0,
349            end: 0,
350        };
351        assert_eq!(ph.literal(), "__BRAZESYNC.cb_id.cb_hero__");
352    }
353
354    #[test]
355    fn duplicate_lid_aborts_with_dedicated_error() {
356        let body = "<a>__BRAZESYNC.lid.cta__</a> <a>__BRAZESYNC.lid.cta__</a>";
357        let map = lookup(&[(PlaceholderType::Lid, "cta", "ai8kexrxcp03")]);
358        let err = resolve_placeholders(body, &map).unwrap_err();
359        assert!(err.iter().any(|e| matches!(
360            e,
361            ResolutionError::DuplicateLidKey { key, occurrences }
362                if key == "cta" && occurrences.len() == 2
363        )));
364    }
365
366    #[test]
367    fn duplicate_cb_id_is_not_an_error() {
368        // cb_id / custom / global re-use is normal substitution per §5.
369        let body = "{{cb.__BRAZESYNC.cb_id.x__}} {{cb.__BRAZESYNC.cb_id.x__}}";
370        let map = lookup(&[(PlaceholderType::CbId, "x", "cb42")]);
371        let out = resolve_placeholders(body, &map).unwrap();
372        assert_eq!(out, "{{cb.cb42}} {{cb.cb42}}");
373    }
374
375    #[test]
376    fn body_without_placeholders_passes_through() {
377        let body = "no placeholders here";
378        let map = BTreeMap::new();
379        assert_eq!(resolve_placeholders(body, &map).unwrap(), body);
380    }
381
382    #[test]
383    fn suspicious_catches_typo_with_underscore_key() {
384        // Regression: loose regex used to use `[^_\s]+` which excluded
385        // underscores, silently letting `__BRAZSYNC.lid.spring_sale__`
386        // (missing `E`, underscored key) through without a warning.
387        let body = "__BRAZSYNC.lid.spring_sale__";
388        let warns = find_suspicious_placeholders(body);
389        assert_eq!(warns, vec!["__BRAZSYNC.lid.spring_sale__".to_string()]);
390    }
391
392    #[test]
393    fn does_not_swallow_text_across_envelope_boundary() {
394        // Regression: a regex with a greedy `[a-z0-9_]*` key class merged
395        // `__BRAZESYNC.lid.foo__hello__BRAZESYNC.lid.bar__` into one
396        // placeholder with key=`foo__hello`. The parser must stop at the
397        // nearest `__` so both placeholders are recovered.
398        let body = "__BRAZESYNC.lid.foo__hello__BRAZESYNC.lid.bar__";
399        let ps = extract_placeholders(body);
400        assert_eq!(ps.len(), 2);
401        assert_eq!(ps[0].key, "foo");
402        assert_eq!(ps[1].key, "bar");
403        assert_eq!(&body[ps[0].start..ps[0].end], "__BRAZESYNC.lid.foo__");
404        assert_eq!(&body[ps[1].start..ps[1].end], "__BRAZESYNC.lid.bar__");
405    }
406
407    #[test]
408    fn adjacent_placeholders_share_no_underscore() {
409        // `____` between two placeholders: prior greedy regex captured
410        // key=`foo__`. The parser should treat the inner `__` as the close.
411        let body = "__BRAZESYNC.lid.foo____BRAZESYNC.lid.bar__";
412        let ps = extract_placeholders(body);
413        assert_eq!(ps.len(), 2);
414        assert_eq!(ps[0].key, "foo");
415        assert_eq!(ps[1].key, "bar");
416    }
417
418    #[test]
419    fn underscored_keys_still_extract() {
420        // Sanity: legitimate underscored keys (RFC §2.3 allows them) are
421        // not broken by the boundary-respecting parser.
422        let body = "__BRAZESYNC.lid.spring_sale__ x __BRAZESYNC.custom.api_host__";
423        let ps = extract_placeholders(body);
424        assert_eq!(ps.len(), 2);
425        assert_eq!(ps[0].key, "spring_sale");
426        assert_eq!(ps[1].key, "api_host");
427    }
428}