Skip to main content

xript_runtime/
fragment.rs

1use std::collections::{HashMap, HashSet};
2
3use regex::Regex;
4
5use crate::error::{Result, XriptError};
6use crate::manifest::{FragmentDeclaration, ModManifest};
7
8pub fn sanitize_html(input: &str) -> String {
9    if input.is_empty() {
10        return String::new();
11    }
12
13    let mut builder = ammonia::Builder::default();
14
15    let allowed_tags: HashSet<&str> = [
16        "div", "span", "p",
17        "h1", "h2", "h3", "h4", "h5", "h6",
18        "ul", "ol", "li", "dl", "dt", "dd",
19        "table", "thead", "tbody", "tfoot", "tr", "td", "th",
20        "caption", "col", "colgroup",
21        "figure", "figcaption", "blockquote", "pre", "code",
22        "em", "strong", "b", "i", "u", "s", "small", "sub", "sup",
23        "br", "hr",
24        "img", "picture", "source", "audio", "video", "track",
25        "details", "summary",
26        "section", "article", "aside", "nav",
27        "header", "footer", "main",
28        "a", "abbr", "mark", "time", "wbr",
29        "style",
30        "input", "textarea", "select", "option", "label",
31    ].into_iter().collect();
32
33    let stripped_tags: HashSet<&str> = [
34        "script", "iframe", "object", "embed", "form",
35        "base", "link", "meta", "title",
36        "noscript", "applet", "frame", "frameset",
37        "param",
38    ].into_iter().collect();
39
40    builder.tags(allowed_tags);
41    builder.clean_content_tags(stripped_tags);
42
43    let generic_attrs: HashSet<&str> = [
44        "class", "id", "style", "role", "tabindex", "hidden",
45        "lang", "dir", "title", "rel",
46    ].into_iter().collect();
47    builder.generic_attributes(generic_attrs);
48    builder.link_rel(None);
49
50    let mut tag_attrs: HashMap<&str, HashSet<&str>> = HashMap::new();
51
52    let img_attrs: HashSet<&str> = ["src", "alt", "width", "height"].into_iter().collect();
53    tag_attrs.insert("img", img_attrs);
54
55    let a_attrs: HashSet<&str> = ["href", "target"].into_iter().collect();
56    tag_attrs.insert("a", a_attrs);
57
58    let td_attrs: HashSet<&str> = ["colspan", "rowspan", "scope", "headers"].into_iter().collect();
59    tag_attrs.insert("td", td_attrs.clone());
60    tag_attrs.insert("th", td_attrs);
61
62    let input_attrs: HashSet<&str> = [
63        "type", "value", "placeholder", "name", "for",
64        "checked", "disabled", "readonly", "required",
65        "rows", "cols", "maxlength", "minlength",
66        "min", "max", "step", "pattern",
67    ].into_iter().collect();
68    tag_attrs.insert("input", input_attrs.clone());
69    tag_attrs.insert("textarea", input_attrs.clone());
70    tag_attrs.insert("select", input_attrs.clone());
71    tag_attrs.insert("label", {
72        let mut s = HashSet::new();
73        s.insert("for");
74        s
75    });
76
77    let source_attrs: HashSet<&str> = ["src", "type"].into_iter().collect();
78    tag_attrs.insert("source", source_attrs);
79    tag_attrs.insert("audio", {
80        let mut s = HashSet::new();
81        s.insert("src");
82        s
83    });
84    tag_attrs.insert("video", {
85        let mut s: HashSet<&str> = HashSet::new();
86        s.insert("src");
87        s.insert("width");
88        s.insert("height");
89        s
90    });
91    tag_attrs.insert("track", {
92        let mut s: HashSet<&str> = HashSet::new();
93        s.insert("src");
94        s
95    });
96
97    builder.tag_attributes(tag_attrs);
98
99    builder.generic_attribute_prefixes(["data-", "aria-"].into_iter().collect());
100
101    builder.url_schemes(["http", "https", "mailto", "data"].into_iter().collect());
102
103    builder.strip_comments(true);
104
105    let result = builder.clean(input).to_string();
106
107    sanitize_styles_in_output(&result)
108}
109
110fn sanitize_styles_in_output(html: &str) -> String {
111    let style_block_re = Regex::new(r"(?s)<style>(.*?)</style>").unwrap();
112    let inline_style_re = Regex::new(r#"style="([^"]*)""#).unwrap();
113
114    let result = style_block_re.replace_all(html, |caps: &regex::Captures| {
115        let css = &caps[1];
116        let cleaned = sanitize_css(css);
117        if cleaned.trim().is_empty() {
118            String::new()
119        } else {
120            format!("<style>{}</style>", cleaned)
121        }
122    }).to_string();
123
124    inline_style_re.replace_all(&result, |caps: &regex::Captures| {
125        let style_val = &caps[1];
126        let cleaned = sanitize_style_value(style_val);
127        if cleaned.is_empty() {
128            String::new()
129        } else {
130            format!("style=\"{}\"", cleaned)
131        }
132    }).to_string()
133}
134
135fn sanitize_css(css: &str) -> String {
136    let url_re = Regex::new(r"(?i)url\s*\([^)]*\)").unwrap();
137    let expression_re = Regex::new(r"(?i)expression\s*\([^)]*\)").unwrap();
138    let moz_binding_re = Regex::new(r"(?i)-moz-binding\s*:[^;}\n]*").unwrap();
139    let behavior_re = Regex::new(r"(?i)behavior\s*:[^;}\n]*").unwrap();
140
141    let mut cleaned = css.to_string();
142    cleaned = url_re.replace_all(&cleaned, "").to_string();
143    cleaned = expression_re.replace_all(&cleaned, "").to_string();
144    cleaned = moz_binding_re.replace_all(&cleaned, "").to_string();
145    cleaned = behavior_re.replace_all(&cleaned, "").to_string();
146
147    let block_re = Regex::new(r"\{([^}]*)\}").unwrap();
148    cleaned = block_re.replace_all(&cleaned, |caps: &regex::Captures| {
149        let block = &caps[1];
150        let declarations: Vec<&str> = block
151            .split(';')
152            .map(|d| d.trim())
153            .filter(|d| {
154                if d.is_empty() { return false; }
155                if let Some(colon_idx) = d.find(':') {
156                    let value = d[colon_idx + 1..].trim();
157                    !value.is_empty()
158                } else {
159                    false
160                }
161            })
162            .collect();
163
164        if declarations.is_empty() {
165            "{}".to_string()
166        } else {
167            format!("{{ {}; }}", declarations.join("; "))
168        }
169    }).to_string();
170
171    cleaned
172}
173
174fn sanitize_style_value(style: &str) -> String {
175    let url_re = Regex::new(r"(?i)url\s*\([^)]*\)").unwrap();
176    let expression_re = Regex::new(r"(?i)expression\s*\([^)]*\)").unwrap();
177    let moz_binding_re = Regex::new(r"(?i)-moz-binding\s*:[^;]*").unwrap();
178    let behavior_re = Regex::new(r"(?i)behavior\s*:[^;]*").unwrap();
179
180    let mut cleaned = style.to_string();
181    cleaned = url_re.replace_all(&cleaned, "").to_string();
182    cleaned = expression_re.replace_all(&cleaned, "").to_string();
183    cleaned = moz_binding_re.replace_all(&cleaned, "").to_string();
184    cleaned = behavior_re.replace_all(&cleaned, "").to_string();
185    cleaned.trim().to_string()
186}
187
188#[derive(Debug, Clone)]
189pub struct FragmentResult {
190    pub fragment_id: String,
191    pub html: String,
192    pub visibility: HashMap<String, bool>,
193}
194
195pub fn process_fragment(
196    fragment_id: &str,
197    sanitized_source: &str,
198    bindings: &HashMap<String, serde_json::Value>,
199) -> FragmentResult {
200    let data_bind_re = Regex::new(
201        r#"(<[^>]*\bdata-bind="([^"]*)"[^>]*>)([\s\S]*?)(</[^>]+>)"#,
202    ).unwrap();
203
204    let self_closing_bind_re = Regex::new(
205        r#"(<[^>]*\bdata-bind="([^"]*)"[^>]*)\s*/>"#,
206    ).unwrap();
207
208    let data_if_re = Regex::new(
209        r#"<[^>]*\bdata-if="([^"]*)"[^>]*>"#,
210    ).unwrap();
211
212    let html = data_bind_re.replace_all(sanitized_source, |caps: &regex::Captures| {
213        let open_tag = &caps[1];
214        let bind_name = &caps[2];
215        let close_tag = &caps[4];
216
217        match bindings.get(bind_name) {
218            Some(val) => {
219                let text = value_to_string(val);
220                format!("{}{}{}", open_tag, text, close_tag)
221            }
222            None => caps[0].to_string(),
223        }
224    }).to_string();
225
226    let html = self_closing_bind_re.replace_all(&html, |caps: &regex::Captures| {
227        let before_close = &caps[1];
228        let bind_name = &caps[2];
229
230        match bindings.get(bind_name) {
231            Some(val) => {
232                let text = value_to_string(val);
233                let value_attr = format!("value=\"{}\"", text);
234                let value_re = Regex::new(r#"value="[^"]*""#).unwrap();
235                if value_re.is_match(before_close) {
236                    let updated = value_re.replace(before_close, value_attr.as_str());
237                    format!("{} />", updated)
238                } else {
239                    format!("{} {} />", before_close, value_attr)
240                }
241            }
242            None => caps[0].to_string(),
243        }
244    }).to_string();
245
246    let mut visibility = HashMap::new();
247    for caps in data_if_re.captures_iter(&html) {
248        let expression = caps[1].to_string();
249        let result = evaluate_condition(&expression, bindings);
250        visibility.insert(expression, result);
251    }
252
253    FragmentResult {
254        fragment_id: fragment_id.to_string(),
255        html,
256        visibility,
257    }
258}
259
260fn value_to_string(val: &serde_json::Value) -> String {
261    match val {
262        serde_json::Value::String(s) => s.clone(),
263        serde_json::Value::Null => String::new(),
264        other => other.to_string(),
265    }
266}
267
268fn evaluate_condition(expression: &str, bindings: &HashMap<String, serde_json::Value>) -> bool {
269    let trimmed = expression.trim();
270
271    if let Some(val) = bindings.get(trimmed) {
272        return is_truthy(val);
273    }
274
275    let lt_re = Regex::new(r"^(\w+)\s*<\s*(\d+(?:\.\d+)?)$").unwrap();
276    if let Some(caps) = lt_re.captures(trimmed) {
277        let var_name = &caps[1];
278        let threshold: f64 = caps[2].parse().unwrap_or(0.0);
279        if let Some(val) = bindings.get(var_name) {
280            if let Some(n) = val.as_f64() {
281                return n < threshold;
282            }
283        }
284        return false;
285    }
286
287    let gt_re = Regex::new(r"^(\w+)\s*>\s*(\d+(?:\.\d+)?)$").unwrap();
288    if let Some(caps) = gt_re.captures(trimmed) {
289        let var_name = &caps[1];
290        let threshold: f64 = caps[2].parse().unwrap_or(0.0);
291        if let Some(val) = bindings.get(var_name) {
292            if let Some(n) = val.as_f64() {
293                return n > threshold;
294            }
295        }
296        return false;
297    }
298
299    let lte_re = Regex::new(r"^(\w+)\s*<=\s*(\d+(?:\.\d+)?)$").unwrap();
300    if let Some(caps) = lte_re.captures(trimmed) {
301        let var_name = &caps[1];
302        let threshold: f64 = caps[2].parse().unwrap_or(0.0);
303        if let Some(val) = bindings.get(var_name) {
304            if let Some(n) = val.as_f64() {
305                return n <= threshold;
306            }
307        }
308        return false;
309    }
310
311    let gte_re = Regex::new(r"^(\w+)\s*>=\s*(\d+(?:\.\d+)?)$").unwrap();
312    if let Some(caps) = gte_re.captures(trimmed) {
313        let var_name = &caps[1];
314        let threshold: f64 = caps[2].parse().unwrap_or(0.0);
315        if let Some(val) = bindings.get(var_name) {
316            if let Some(n) = val.as_f64() {
317                return n >= threshold;
318            }
319        }
320        return false;
321    }
322
323    let eq_re = Regex::new(r"^(\w+)\s*===?\s*(\d+(?:\.\d+)?)$").unwrap();
324    if let Some(caps) = eq_re.captures(trimmed) {
325        let var_name = &caps[1];
326        let threshold: f64 = caps[2].parse().unwrap_or(0.0);
327        if let Some(val) = bindings.get(var_name) {
328            if let Some(n) = val.as_f64() {
329                return (n - threshold).abs() < f64::EPSILON;
330            }
331        }
332        return false;
333    }
334
335    let neq_re = Regex::new(r"^(\w+)\s*!==?\s*(\d+(?:\.\d+)?)$").unwrap();
336    if let Some(caps) = neq_re.captures(trimmed) {
337        let var_name = &caps[1];
338        let threshold: f64 = caps[2].parse().unwrap_or(0.0);
339        if let Some(val) = bindings.get(var_name) {
340            if let Some(n) = val.as_f64() {
341                return (n - threshold).abs() >= f64::EPSILON;
342            }
343        }
344        return false;
345    }
346
347    false
348}
349
350fn is_truthy(val: &serde_json::Value) -> bool {
351    match val {
352        serde_json::Value::Null => false,
353        serde_json::Value::Bool(b) => *b,
354        serde_json::Value::Number(n) => n.as_f64().map_or(false, |v| v != 0.0),
355        serde_json::Value::String(s) => !s.is_empty(),
356        serde_json::Value::Array(_) => true,
357        serde_json::Value::Object(_) => true,
358    }
359}
360
361pub fn resolve_binding_path(data: &serde_json::Value, path: &str) -> serde_json::Value {
362    let parts: Vec<&str> = path.split('.').collect();
363    let mut current = data;
364
365    for part in parts {
366        match current {
367            serde_json::Value::Object(map) => {
368                if let Some(val) = map.get(part) {
369                    current = val;
370                } else {
371                    return serde_json::Value::Null;
372                }
373            }
374            _ => return serde_json::Value::Null,
375        }
376    }
377
378    current.clone()
379}
380
381pub fn resolve_bindings(
382    declarations: &[crate::manifest::FragmentBinding],
383    data: &serde_json::Value,
384) -> HashMap<String, serde_json::Value> {
385    let mut resolved = HashMap::new();
386    for binding in declarations {
387        resolved.insert(
388            binding.name.clone(),
389            resolve_binding_path(data, &binding.path),
390        );
391    }
392    resolved
393}
394
395#[derive(Debug, Clone)]
396pub struct FragmentInstance {
397    pub id: String,
398    pub slot: String,
399    pub format: String,
400    pub priority: i32,
401    pub declaration: FragmentDeclaration,
402    pub sanitized_source: String,
403}
404
405impl FragmentInstance {
406    pub fn get_content(&self, data: &serde_json::Value) -> FragmentResult {
407        let bindings = if let Some(ref decls) = self.declaration.bindings {
408            resolve_bindings(decls, data)
409        } else {
410            HashMap::new()
411        };
412        process_fragment(&self.id, &self.sanitized_source, &bindings)
413    }
414
415    pub fn get_events(&self) -> Vec<crate::manifest::FragmentEvent> {
416        self.declaration.events.clone().unwrap_or_default()
417    }
418}
419
420pub fn create_fragment_instance(
421    declaration: &FragmentDeclaration,
422    source: &str,
423) -> FragmentInstance {
424    let sanitized = sanitize_html(source);
425    FragmentInstance {
426        id: declaration.id.clone(),
427        slot: declaration.slot.clone(),
428        format: declaration.format.clone(),
429        priority: declaration.priority.unwrap_or(0),
430        declaration: declaration.clone(),
431        sanitized_source: sanitized,
432    }
433}
434
435#[derive(Debug)]
436pub struct ModInstance {
437    pub id: String,
438    pub name: String,
439    pub version: String,
440    pub fragments: Vec<FragmentInstance>,
441}
442
443impl ModInstance {
444    pub fn update_bindings(&self, data: &serde_json::Value) -> Vec<FragmentResult> {
445        self.fragments.iter().map(|f| f.get_content(data)).collect()
446    }
447}
448
449static MOD_ID_COUNTER: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0);
450
451pub fn create_mod_instance(
452    mod_manifest: &ModManifest,
453    fragment_sources: &HashMap<String, String>,
454) -> ModInstance {
455    let counter = MOD_ID_COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
456    let id = format!("mod-{}-{}", counter, mod_manifest.name);
457    let mut fragments = Vec::new();
458
459    if let Some(ref decls) = mod_manifest.fragments {
460        for decl in decls {
461            let source = if decl.inline.unwrap_or(false) {
462                decl.source.clone()
463            } else {
464                fragment_sources.get(&decl.source).cloned().unwrap_or_default()
465            };
466            fragments.push(create_fragment_instance(decl, &source));
467        }
468    }
469
470    ModInstance {
471        id,
472        name: mod_manifest.name.clone(),
473        version: mod_manifest.version.clone(),
474        fragments,
475    }
476}
477
478pub fn load_mod(
479    mod_manifest_json: &str,
480    app_manifest: &crate::manifest::Manifest,
481    granted_capabilities: &HashSet<String>,
482    fragment_sources: &HashMap<String, String>,
483) -> Result<ModInstance> {
484    let mod_manifest: ModManifest = serde_json::from_str(mod_manifest_json)
485        .map_err(XriptError::Json)?;
486
487    crate::manifest::validate_mod_manifest(&mod_manifest)?;
488
489    let slots = app_manifest.slots.as_deref().unwrap_or(&[]);
490    let cross_issues = crate::manifest::validate_mod_against_app(
491        &mod_manifest,
492        slots,
493        granted_capabilities,
494    );
495
496    if !cross_issues.is_empty() {
497        return Err(XriptError::ManifestValidation {
498            issues: cross_issues,
499        });
500    }
501
502    Ok(create_mod_instance(&mod_manifest, fragment_sources))
503}