merman_render/svg/pipeline/builtin/
attr_sanitize.rs1use crate::Result;
2use regex::{Captures, Regex};
3use std::borrow::Cow;
4use std::sync::OnceLock;
5
6use super::css_sanitize::strip_css_deg_units;
7use super::util::find_tag_end;
8use crate::svg::pipeline::{SvgPostprocessContext, SvgPostprocessor};
9
10#[derive(Debug, Clone, Copy, Default)]
11pub struct SanitizeSvgAttributesPostprocessor;
12
13impl SvgPostprocessor for SanitizeSvgAttributesPostprocessor {
14 fn name(&self) -> &'static str {
15 "sanitize-svg-attributes"
16 }
17
18 fn process<'a>(
19 &self,
20 svg: Cow<'a, str>,
21 _ctx: &SvgPostprocessContext<'_>,
22 ) -> Result<Cow<'a, str>> {
23 Ok(Cow::Owned(sanitize_element_attributes(&svg)))
24 }
25}
26
27pub(crate) fn sanitize_element_attributes(svg: &str) -> String {
28 let mut out = String::with_capacity(svg.len());
29 let mut cursor = 0;
30
31 while let Some(rel_start) = svg[cursor..].find('<') {
32 let start = cursor + rel_start;
33 out.push_str(&svg[cursor..start]);
34
35 let Some(end) = find_tag_end(svg, start) else {
36 out.push_str(&svg[start..]);
37 return out;
38 };
39
40 let tag = &svg[start..=end];
41 if is_bad_rect_tag(tag) {
42 cursor = if tag.trim_end().ends_with("/>") {
43 end + 1
44 } else {
45 svg[end + 1..]
46 .find("</rect>")
47 .map(|rel_close| end + 1 + rel_close + "</rect>".len())
48 .unwrap_or(end + 1)
49 };
50 continue;
51 }
52 out.push_str(&sanitize_tag_attributes(tag));
53 cursor = end + 1;
54 }
55
56 out.push_str(&svg[cursor..]);
57 out
58}
59
60fn sanitize_tag_attributes(tag: &str) -> Cow<'_, str> {
61 if tag.starts_with("</")
62 || tag.starts_with("<!--")
63 || tag.starts_with("<!")
64 || tag.starts_with("<?")
65 {
66 return Cow::Borrowed(tag);
67 }
68
69 static ATTR_RE: OnceLock<Regex> = OnceLock::new();
70 let attr_re = ATTR_RE.get_or_init(|| {
71 Regex::new(r#"\s+([A-Za-z_:][-A-Za-z0-9_:.]*)\s*=\s*"([^"]*)""#)
72 .expect("valid SVG attribute regex")
73 });
74
75 let mut changed = false;
76 let result = attr_re
77 .replace_all(tag, |caps: &Captures<'_>| {
78 let full = &caps[0];
79 let name = &caps[1];
80 let value = &caps[2];
81
82 if should_drop_attribute(name, value) {
83 changed = true;
84 return String::new();
85 }
86
87 if let Some(value) = normalize_px_attribute(name, value) {
88 changed = true;
89 return format!(r#" {name}="{value}""#);
90 }
91
92 if name.eq_ignore_ascii_case("style") {
93 let sanitized = sanitize_style_attribute(value);
94 if sanitized.trim().is_empty() {
95 changed = true;
96 return String::new();
97 }
98 if sanitized != value {
99 changed = true;
100 return format!(r#" style="{sanitized}""#);
101 }
102 }
103
104 full.to_string()
105 })
106 .into_owned();
107
108 if changed {
109 Cow::Owned(result)
110 } else {
111 Cow::Borrowed(tag)
112 }
113}
114
115fn should_drop_attribute(name: &str, value: &str) -> bool {
116 if name.eq_ignore_ascii_case("style") {
117 return false;
118 }
119
120 let normalized = name.to_ascii_lowercase();
121 let guarded = matches!(
122 normalized.as_str(),
123 "fill"
124 | "stroke"
125 | "width"
126 | "height"
127 | "x"
128 | "y"
129 | "x1"
130 | "x2"
131 | "y1"
132 | "y2"
133 | "r"
134 | "cx"
135 | "cy"
136 | "rx"
137 | "ry"
138 | "stroke-width"
139 | "transform"
140 | "d"
141 | "points"
142 );
143
144 guarded && is_invalid_svg_value(value)
145}
146
147fn normalize_px_attribute(name: &str, value: &str) -> Option<String> {
148 let normalized = name.to_ascii_lowercase();
149 let guarded = matches!(
150 normalized.as_str(),
151 "width"
152 | "height"
153 | "x"
154 | "y"
155 | "x1"
156 | "x2"
157 | "y1"
158 | "y2"
159 | "r"
160 | "cx"
161 | "cy"
162 | "rx"
163 | "ry"
164 | "stroke-width"
165 );
166 if !guarded {
167 return None;
168 }
169
170 let trimmed = value.trim();
171 let number = trimmed.strip_suffix("px")?.trim();
172 if number.parse::<f64>().is_ok_and(f64::is_finite) {
173 Some(number.to_string())
174 } else {
175 None
176 }
177}
178
179fn is_start_or_empty_tag(tag: &str, expected: &str) -> bool {
180 let tag = tag.trim_start();
181 if !tag.starts_with('<') || tag.starts_with("</") || tag.starts_with("<!--") {
182 return false;
183 }
184
185 let name = tag[1..]
186 .chars()
187 .take_while(|ch| !ch.is_whitespace() && *ch != '/' && *ch != '>')
188 .collect::<String>();
189 name.eq_ignore_ascii_case(expected)
190}
191
192fn attr_value(tag: &str, name: &str) -> Option<String> {
193 static ATTR_RE: OnceLock<Regex> = OnceLock::new();
194 let attr_re = ATTR_RE.get_or_init(|| {
195 Regex::new(r#"\s+([A-Za-z_:][-A-Za-z0-9_:.]*)\s*=\s*"([^"]*)""#)
196 .expect("valid SVG attribute regex")
197 });
198
199 for caps in attr_re.captures_iter(tag) {
200 if caps[1].eq_ignore_ascii_case(name) {
201 return Some(caps[2].to_string());
202 }
203 }
204 None
205}
206
207fn is_missing_or_invalid_rect_dimension(value: Option<&str>) -> bool {
208 let Some(value) = value.map(str::trim) else {
209 return true;
210 };
211 if value.is_empty() {
212 return true;
213 }
214 if let Ok(n) = value.parse::<f64>() {
215 return !n.is_finite() || n <= 0.0;
216 }
217 false
218}
219
220fn is_bad_rect_tag(tag: &str) -> bool {
221 if !is_start_or_empty_tag(tag, "rect") {
222 return false;
223 }
224
225 let width = attr_value(tag, "width");
226 let height = attr_value(tag, "height");
227 is_missing_or_invalid_rect_dimension(width.as_deref())
228 || is_missing_or_invalid_rect_dimension(height.as_deref())
229}
230
231fn sanitize_style_attribute(value: &str) -> String {
232 let mut out = Vec::new();
233
234 for decl in value.split(';') {
235 let trimmed = decl.trim();
236 if trimmed.is_empty() {
237 continue;
238 }
239
240 let Some((property, raw_value)) = trimmed.split_once(':') else {
241 if is_invalid_svg_value(trimmed) {
242 continue;
243 }
244 out.push(strip_css_deg_units(trimmed));
245 continue;
246 };
247
248 let property = property.trim();
249 let value = raw_value.trim();
250 if value.is_empty() || is_invalid_svg_value(value) {
251 continue;
252 }
253 if property
254 .trim()
255 .to_ascii_lowercase()
256 .starts_with("animation")
257 {
258 continue;
259 }
260
261 out.push(format!("{property}:{}", strip_css_deg_units(value)));
262 }
263
264 out.join(";")
265}
266
267fn is_invalid_svg_value(value: &str) -> bool {
268 let value = value.trim();
269 if value.is_empty() {
270 return true;
271 }
272
273 let lower = value.to_ascii_lowercase();
274 lower.contains("nan") || lower.contains("undefined") || lower.contains("infinity")
275}
276
277#[cfg(test)]
278mod tests {
279 use super::sanitize_element_attributes;
280
281 #[test]
282 fn sanitize_style_attribute_drops_invalid_bare_declarations() {
283 let svg = r#"<svg><path style="undefined; stroke: #333; undefined"/></svg>"#;
284 let out = sanitize_element_attributes(svg);
285
286 assert!(!out.contains("undefined"), "got: {out}");
287 assert!(out.contains(r#"style="stroke:#333""#), "got: {out}");
288 }
289
290 #[test]
291 fn sanitize_element_attributes_drops_rects_without_positive_dimensions() {
292 let svg = r#"<svg><rect/><rect width="0" height="10"/><rect width="12" height="8"/><g><rect width="NaN" height="10"><title>bad</title></rect></g></svg>"#;
293 let out = sanitize_element_attributes(svg);
294
295 assert!(!out.contains("<rect/>"), "got: {out}");
296 assert!(!out.contains(r#"width="0""#), "got: {out}");
297 assert!(!out.contains("NaN"), "got: {out}");
298 assert!(!out.contains("<title>bad</title>"), "got: {out}");
299 assert!(
300 out.contains(r#"<rect width="12" height="8"/>"#),
301 "got: {out}"
302 );
303 }
304}