Skip to main content

merman_render/svg/pipeline/builtin/
attr_sanitize.rs

1use crate::Result;
2use regex::{Captures, Regex};
3use std::borrow::Cow;
4use std::sync::OnceLock;
5
6use super::css_sanitize::strip_css_deg_units;
7use super::util::find_tag_end;
8use crate::svg::pipeline::{SvgPostprocessContext, SvgPostprocessor};
9
10#[derive(Debug, Clone, Copy, Default)]
11pub struct SanitizeSvgAttributesPostprocessor;
12
13impl SvgPostprocessor for SanitizeSvgAttributesPostprocessor {
14    fn name(&self) -> &'static str {
15        "sanitize-svg-attributes"
16    }
17
18    fn process<'a>(
19        &self,
20        svg: Cow<'a, str>,
21        _ctx: &SvgPostprocessContext<'_>,
22    ) -> Result<Cow<'a, str>> {
23        Ok(Cow::Owned(sanitize_element_attributes(&svg)))
24    }
25}
26
27pub(crate) fn sanitize_element_attributes(svg: &str) -> String {
28    let mut out = String::with_capacity(svg.len());
29    let mut cursor = 0;
30
31    while let Some(rel_start) = svg[cursor..].find('<') {
32        let start = cursor + rel_start;
33        out.push_str(&svg[cursor..start]);
34
35        let Some(end) = find_tag_end(svg, start) else {
36            out.push_str(&svg[start..]);
37            return out;
38        };
39
40        let tag = &svg[start..=end];
41        if is_bad_rect_tag(tag) {
42            cursor = if tag.trim_end().ends_with("/>") {
43                end + 1
44            } else {
45                svg[end + 1..]
46                    .find("</rect>")
47                    .map(|rel_close| end + 1 + rel_close + "</rect>".len())
48                    .unwrap_or(end + 1)
49            };
50            continue;
51        }
52        out.push_str(&sanitize_tag_attributes(tag));
53        cursor = end + 1;
54    }
55
56    out.push_str(&svg[cursor..]);
57    out
58}
59
60fn sanitize_tag_attributes(tag: &str) -> Cow<'_, str> {
61    if tag.starts_with("</")
62        || tag.starts_with("<!--")
63        || tag.starts_with("<!")
64        || tag.starts_with("<?")
65    {
66        return Cow::Borrowed(tag);
67    }
68
69    static ATTR_RE: OnceLock<Regex> = OnceLock::new();
70    let attr_re = ATTR_RE.get_or_init(|| {
71        Regex::new(r#"\s+([A-Za-z_:][-A-Za-z0-9_:.]*)\s*=\s*"([^"]*)""#)
72            .expect("valid SVG attribute regex")
73    });
74
75    let mut changed = false;
76    let result = attr_re
77        .replace_all(tag, |caps: &Captures<'_>| {
78            let full = &caps[0];
79            let name = &caps[1];
80            let value = &caps[2];
81
82            if should_drop_attribute(name, value) {
83                changed = true;
84                return String::new();
85            }
86
87            if let Some(value) = normalize_px_attribute(name, value) {
88                changed = true;
89                return format!(r#" {name}="{value}""#);
90            }
91
92            if name.eq_ignore_ascii_case("style") {
93                let sanitized = sanitize_style_attribute(value);
94                if sanitized.trim().is_empty() {
95                    changed = true;
96                    return String::new();
97                }
98                if sanitized != value {
99                    changed = true;
100                    return format!(r#" style="{sanitized}""#);
101                }
102            }
103
104            full.to_string()
105        })
106        .into_owned();
107
108    if changed {
109        Cow::Owned(result)
110    } else {
111        Cow::Borrowed(tag)
112    }
113}
114
115fn should_drop_attribute(name: &str, value: &str) -> bool {
116    if name.eq_ignore_ascii_case("style") {
117        return false;
118    }
119
120    let normalized = name.to_ascii_lowercase();
121    let guarded = matches!(
122        normalized.as_str(),
123        "fill"
124            | "stroke"
125            | "width"
126            | "height"
127            | "x"
128            | "y"
129            | "x1"
130            | "x2"
131            | "y1"
132            | "y2"
133            | "r"
134            | "cx"
135            | "cy"
136            | "rx"
137            | "ry"
138            | "stroke-width"
139            | "transform"
140            | "d"
141            | "points"
142    );
143
144    guarded && is_invalid_svg_value(value)
145}
146
147fn normalize_px_attribute(name: &str, value: &str) -> Option<String> {
148    let normalized = name.to_ascii_lowercase();
149    let guarded = matches!(
150        normalized.as_str(),
151        "width"
152            | "height"
153            | "x"
154            | "y"
155            | "x1"
156            | "x2"
157            | "y1"
158            | "y2"
159            | "r"
160            | "cx"
161            | "cy"
162            | "rx"
163            | "ry"
164            | "stroke-width"
165    );
166    if !guarded {
167        return None;
168    }
169
170    let trimmed = value.trim();
171    let number = trimmed.strip_suffix("px")?.trim();
172    if number.parse::<f64>().is_ok_and(f64::is_finite) {
173        Some(number.to_string())
174    } else {
175        None
176    }
177}
178
179fn is_start_or_empty_tag(tag: &str, expected: &str) -> bool {
180    let tag = tag.trim_start();
181    if !tag.starts_with('<') || tag.starts_with("</") || tag.starts_with("<!--") {
182        return false;
183    }
184
185    let name = tag[1..]
186        .chars()
187        .take_while(|ch| !ch.is_whitespace() && *ch != '/' && *ch != '>')
188        .collect::<String>();
189    name.eq_ignore_ascii_case(expected)
190}
191
192fn attr_value(tag: &str, name: &str) -> Option<String> {
193    static ATTR_RE: OnceLock<Regex> = OnceLock::new();
194    let attr_re = ATTR_RE.get_or_init(|| {
195        Regex::new(r#"\s+([A-Za-z_:][-A-Za-z0-9_:.]*)\s*=\s*"([^"]*)""#)
196            .expect("valid SVG attribute regex")
197    });
198
199    for caps in attr_re.captures_iter(tag) {
200        if caps[1].eq_ignore_ascii_case(name) {
201            return Some(caps[2].to_string());
202        }
203    }
204    None
205}
206
207fn is_missing_or_invalid_rect_dimension(value: Option<&str>) -> bool {
208    let Some(value) = value.map(str::trim) else {
209        return true;
210    };
211    if value.is_empty() {
212        return true;
213    }
214    if let Ok(n) = value.parse::<f64>() {
215        return !n.is_finite() || n <= 0.0;
216    }
217    false
218}
219
220fn is_bad_rect_tag(tag: &str) -> bool {
221    if !is_start_or_empty_tag(tag, "rect") {
222        return false;
223    }
224
225    let width = attr_value(tag, "width");
226    let height = attr_value(tag, "height");
227    is_missing_or_invalid_rect_dimension(width.as_deref())
228        || is_missing_or_invalid_rect_dimension(height.as_deref())
229}
230
231fn sanitize_style_attribute(value: &str) -> String {
232    let mut out = Vec::new();
233
234    for decl in value.split(';') {
235        let trimmed = decl.trim();
236        if trimmed.is_empty() {
237            continue;
238        }
239
240        let Some((property, raw_value)) = trimmed.split_once(':') else {
241            if is_invalid_svg_value(trimmed) {
242                continue;
243            }
244            out.push(strip_css_deg_units(trimmed));
245            continue;
246        };
247
248        let property = property.trim();
249        let value = raw_value.trim();
250        if value.is_empty() || is_invalid_svg_value(value) {
251            continue;
252        }
253        if property
254            .trim()
255            .to_ascii_lowercase()
256            .starts_with("animation")
257        {
258            continue;
259        }
260
261        out.push(format!("{property}:{}", strip_css_deg_units(value)));
262    }
263
264    out.join(";")
265}
266
267fn is_invalid_svg_value(value: &str) -> bool {
268    let value = value.trim();
269    if value.is_empty() {
270        return true;
271    }
272
273    let lower = value.to_ascii_lowercase();
274    lower.contains("nan") || lower.contains("undefined") || lower.contains("infinity")
275}
276
277#[cfg(test)]
278mod tests {
279    use super::sanitize_element_attributes;
280
281    #[test]
282    fn sanitize_style_attribute_drops_invalid_bare_declarations() {
283        let svg = r#"<svg><path style="undefined; stroke: #333; undefined"/></svg>"#;
284        let out = sanitize_element_attributes(svg);
285
286        assert!(!out.contains("undefined"), "got: {out}");
287        assert!(out.contains(r#"style="stroke:#333""#), "got: {out}");
288    }
289
290    #[test]
291    fn sanitize_element_attributes_drops_rects_without_positive_dimensions() {
292        let svg = r#"<svg><rect/><rect width="0" height="10"/><rect width="12" height="8"/><g><rect width="NaN" height="10"><title>bad</title></rect></g></svg>"#;
293        let out = sanitize_element_attributes(svg);
294
295        assert!(!out.contains("<rect/>"), "got: {out}");
296        assert!(!out.contains(r#"width="0""#), "got: {out}");
297        assert!(!out.contains("NaN"), "got: {out}");
298        assert!(!out.contains("<title>bad</title>"), "got: {out}");
299        assert!(
300            out.contains(r#"<rect width="12" height="8"/>"#),
301            "got: {out}"
302        );
303    }
304}