rosvgtree/
parse.rs

1use std::collections::HashMap;
2
3use roxmltree::Error;
4
5use crate::{Attribute, AttributeId, Document, ElementId, NodeData, NodeId, NodeKind, ShortRange};
6
7const SVG_NS: &str = "http://www.w3.org/2000/svg";
8const XLINK_NS: &str = "http://www.w3.org/1999/xlink";
9const XML_NAMESPACE_NS: &str = "http://www.w3.org/XML/1998/namespace";
10
11impl<'input> Document<'input> {
12    /// Parses a [`Document`] from a string.
13    pub fn parse_str(text: &'input str) -> Result<Document<'input>, Error> {
14        let xml = roxmltree::Document::parse(text)?;
15        parse(&xml)
16    }
17
18    /// Parses a [`Document`] from a [`roxmltree::Document`].
19    pub fn parse_tree(xml: &roxmltree::Document<'input>) -> Result<Document<'input>, Error> {
20        parse(xml)
21    }
22
23    pub(crate) fn append(&mut self, parent_id: NodeId, kind: NodeKind) -> NodeId {
24        let new_child_id = NodeId::from(self.nodes.len());
25        self.nodes.push(NodeData {
26            parent: Some(parent_id),
27            next_sibling: None,
28            children: None,
29            kind,
30        });
31
32        let last_child_id = self.nodes[parent_id.get_usize()].children.map(|(_, id)| id);
33
34        if let Some(id) = last_child_id {
35            self.nodes[id.get_usize()].next_sibling = Some(new_child_id);
36        }
37
38        self.nodes[parent_id.get_usize()].children = Some(
39            if let Some((first_child_id, _)) = self.nodes[parent_id.get_usize()].children {
40                (first_child_id, new_child_id)
41            } else {
42                (new_child_id, new_child_id)
43            },
44        );
45
46        new_child_id
47    }
48
49    fn append_attribute(&mut self, name: AttributeId, value: roxmltree::StringStorage<'input>) {
50        self.attrs.push(Attribute { name, value });
51    }
52}
53
54fn parse<'input>(xml: &roxmltree::Document<'input>) -> Result<Document<'input>, Error> {
55    let mut doc = Document {
56        nodes: Vec::new(),
57        attrs: Vec::new(),
58        links: HashMap::new(),
59    };
60
61    // Add a root node.
62    doc.nodes.push(NodeData {
63        parent: None,
64        next_sibling: None,
65        children: None,
66        kind: NodeKind::Root,
67    });
68
69    let style_sheet = resolve_css(xml);
70
71    parse_xml_node_children(
72        xml.root(),
73        xml.root(),
74        doc.root().id,
75        &style_sheet,
76        false,
77        0,
78        &mut doc,
79    )?;
80
81    // Check that the root element is `svg`.
82    match doc.root().first_element_child() {
83        Some(child) => {
84            if child.tag_name() != Some(ElementId::Svg) {
85                return Err(roxmltree::Error::NoRootNode);
86            }
87        }
88        None => return Err(roxmltree::Error::NoRootNode),
89    }
90
91    // Collect all elements with `id` attribute.
92    let mut links = HashMap::new();
93    for node in doc.descendants() {
94        if let Some(id) = node.attribute(AttributeId::Id) {
95            links.insert(id.to_string(), node.id);
96        }
97    }
98    doc.links = links;
99
100    fix_recursive_patterns(&mut doc);
101    fix_recursive_links(ElementId::ClipPath, AttributeId::ClipPath, &mut doc);
102    fix_recursive_links(ElementId::Mask, AttributeId::Mask, &mut doc);
103    fix_recursive_links(ElementId::Filter, AttributeId::Filter, &mut doc);
104    fix_recursive_fe_image(&mut doc);
105
106    Ok(doc)
107}
108
109pub(crate) fn parse_tag_name(node: roxmltree::Node) -> Option<ElementId> {
110    if !node.is_element() {
111        return None;
112    }
113
114    if node.tag_name().namespace() != Some(SVG_NS) {
115        return None;
116    }
117
118    ElementId::from_str(node.tag_name().name())
119}
120
121fn parse_xml_node_children<'input>(
122    parent: roxmltree::Node<'_, 'input>,
123    origin: roxmltree::Node,
124    parent_id: NodeId,
125    style_sheet: &simplecss::StyleSheet,
126    ignore_ids: bool,
127    depth: u32,
128    doc: &mut Document<'input>,
129) -> Result<(), Error> {
130    for node in parent.children() {
131        parse_xml_node(node, origin, parent_id, style_sheet, ignore_ids, depth, doc)?;
132    }
133
134    Ok(())
135}
136
137fn parse_xml_node<'input>(
138    node: roxmltree::Node<'_, 'input>,
139    origin: roxmltree::Node,
140    parent_id: NodeId,
141    style_sheet: &simplecss::StyleSheet,
142    ignore_ids: bool,
143    depth: u32,
144    doc: &mut Document<'input>,
145) -> Result<(), Error> {
146    if depth > 1024 {
147        return Err(Error::NodesLimitReached);
148    }
149
150    let mut tag_name = match parse_tag_name(node) {
151        Some(id) => id,
152        None => return Ok(()),
153    };
154
155    if tag_name == ElementId::Style {
156        return Ok(());
157    }
158
159    // TODO: remove?
160    // Treat links as groups.
161    if tag_name == ElementId::A {
162        tag_name = ElementId::G;
163    }
164
165    let node_id = parse_svg_element(node, parent_id, tag_name, style_sheet, ignore_ids, doc)?;
166    if tag_name == ElementId::Text {
167        crate::text::parse_svg_text_element(node, node_id, style_sheet, doc)?;
168    } else if tag_name == ElementId::Use {
169        parse_svg_use_element(node, origin, node_id, style_sheet, depth + 1, doc)?;
170    } else {
171        parse_xml_node_children(
172            node,
173            origin,
174            node_id,
175            style_sheet,
176            ignore_ids,
177            depth + 1,
178            doc,
179        )?;
180    }
181
182    Ok(())
183}
184
185pub(crate) fn parse_svg_element<'input>(
186    xml_node: roxmltree::Node<'_, 'input>,
187    parent_id: NodeId,
188    tag_name: ElementId,
189    style_sheet: &simplecss::StyleSheet,
190    ignore_ids: bool,
191    doc: &mut Document<'input>,
192) -> Result<NodeId, Error> {
193    let attrs_start_idx = doc.attrs.len();
194
195    // Copy presentational attributes first.
196    for attr in xml_node.attributes() {
197        match attr.namespace() {
198            None | Some(SVG_NS) | Some(XLINK_NS) | Some(XML_NAMESPACE_NS) => {}
199            _ => continue,
200        }
201
202        let aid = match AttributeId::from_str(attr.name()) {
203            Some(v) => v,
204            None => continue,
205        };
206
207        // During a `use` resolving, all `id` attributes must be ignored.
208        // Otherwise we will get elements with duplicated id's.
209        if ignore_ids && aid == AttributeId::Id {
210            continue;
211        }
212
213        // For some reason those properties are allowed only inside a `style` attribute and CSS.
214        if matches!(
215            aid,
216            AttributeId::MixBlendMode | AttributeId::Isolation | AttributeId::FontKerning
217        ) {
218            continue;
219        }
220
221        append_attribute(parent_id, tag_name, aid, attr.value_storage().clone(), doc);
222    }
223
224    let mut insert_attribute = |aid, value: &str| {
225        // Check that attribute already exists.
226        let idx = doc.attrs[attrs_start_idx..]
227            .iter_mut()
228            .position(|a| a.name == aid);
229
230        // Append an attribute as usual.
231        let added = append_attribute(
232            parent_id,
233            tag_name,
234            aid,
235            roxmltree::StringStorage::new_owned(value),
236            doc,
237        );
238
239        // Check that attribute was actually added, because it could be skipped.
240        if added {
241            if let Some(idx) = idx {
242                // Swap the last attribute with an existing one.
243                let last_idx = doc.attrs.len() - 1;
244                doc.attrs.swap(attrs_start_idx + idx, last_idx);
245                // Remove last.
246                doc.attrs.pop();
247            }
248        }
249    };
250
251    // Apply CSS.
252    for rule in &style_sheet.rules {
253        if rule.selector.matches(&XmlNode(xml_node)) {
254            for declaration in &rule.declarations {
255                // TODO: perform XML attribute normalization
256                if let Some(aid) = AttributeId::from_str(declaration.name) {
257                    // Parse only the presentation attributes.
258                    if aid.is_presentation() {
259                        insert_attribute(aid, declaration.value);
260                    }
261                } else if declaration.name == "marker" {
262                    insert_attribute(AttributeId::MarkerStart, declaration.value);
263                    insert_attribute(AttributeId::MarkerMid, declaration.value);
264                    insert_attribute(AttributeId::MarkerEnd, declaration.value);
265                }
266            }
267        }
268    }
269
270    // Split a `style` attribute.
271    if let Some(value) = xml_node.attribute("style") {
272        for declaration in simplecss::DeclarationTokenizer::from(value) {
273            // TODO: preform XML attribute normalization
274            if let Some(aid) = AttributeId::from_str(declaration.name) {
275                // Parse only the presentation attributes.
276                if aid.is_presentation() {
277                    insert_attribute(aid, declaration.value);
278                }
279            }
280        }
281    }
282
283    if doc.nodes.len() > 1_000_000 {
284        return Err(Error::NodesLimitReached);
285    }
286
287    let node_id = doc.append(
288        parent_id,
289        NodeKind::Element {
290            tag_name,
291            attributes: ShortRange::new(attrs_start_idx as u32, doc.attrs.len() as u32),
292        },
293    );
294
295    Ok(node_id)
296}
297
298fn append_attribute<'input>(
299    parent_id: NodeId,
300    tag_name: ElementId,
301    aid: AttributeId,
302    value: roxmltree::StringStorage<'input>,
303    doc: &mut Document<'input>,
304) -> bool {
305    match aid {
306        // The `style` attribute will be split into attributes, so we don't need it.
307        AttributeId::Style |
308        // No need to copy a `class` attribute since CSS were already resolved.
309        AttributeId::Class => return false,
310        _ => {}
311    }
312
313    // Ignore `xlink:href` on `tspan` (which was originally `tref` or `a`),
314    // because we will convert `tref` into `tspan` anyway.
315    if tag_name == ElementId::Tspan && aid == AttributeId::Href {
316        return false;
317    }
318
319    if aid.allows_inherit_value() && &*value == "inherit" {
320        return resolve_inherit(parent_id, aid, doc);
321    }
322
323    doc.append_attribute(aid, value);
324    true
325}
326
327fn resolve_inherit(parent_id: NodeId, aid: AttributeId, doc: &mut Document) -> bool {
328    if aid.is_inheritable() {
329        // Inheritable attributes can inherit a value from an any ancestor.
330        let node_id = doc
331            .get(parent_id)
332            .ancestors()
333            .find(|n| n.has_attribute(aid))
334            .map(|n| n.id);
335        if let Some(node_id) = node_id {
336            if let Some(attr) = doc
337                .get(node_id)
338                .attributes()
339                .iter()
340                .find(|a| a.name == aid)
341                .cloned()
342            {
343                doc.attrs.push(Attribute {
344                    name: aid,
345                    value: attr.value,
346                });
347
348                return true;
349            }
350        }
351    } else {
352        // Non-inheritable attributes can inherit a value only from a direct parent.
353        if let Some(attr) = doc
354            .get(parent_id)
355            .attributes()
356            .iter()
357            .find(|a| a.name == aid)
358            .cloned()
359        {
360            doc.attrs.push(Attribute {
361                name: aid,
362                value: attr.value,
363            });
364
365            return true;
366        }
367    }
368
369    // Fallback to a default value if possible.
370    let value = match aid {
371        AttributeId::ImageRendering | AttributeId::ShapeRendering | AttributeId::TextRendering => {
372            "auto"
373        }
374
375        AttributeId::ClipPath
376        | AttributeId::Filter
377        | AttributeId::MarkerEnd
378        | AttributeId::MarkerMid
379        | AttributeId::MarkerStart
380        | AttributeId::Mask
381        | AttributeId::Stroke
382        | AttributeId::StrokeDasharray
383        | AttributeId::TextDecoration => "none",
384
385        AttributeId::FontStretch
386        | AttributeId::FontStyle
387        | AttributeId::FontVariant
388        | AttributeId::FontWeight
389        | AttributeId::LetterSpacing
390        | AttributeId::WordSpacing => "normal",
391
392        AttributeId::Fill | AttributeId::FloodColor | AttributeId::StopColor => "black",
393
394        AttributeId::FillOpacity
395        | AttributeId::FloodOpacity
396        | AttributeId::Opacity
397        | AttributeId::StopOpacity
398        | AttributeId::StrokeOpacity => "1",
399
400        AttributeId::ClipRule | AttributeId::FillRule => "nonzero",
401
402        AttributeId::BaselineShift => "baseline",
403        AttributeId::ColorInterpolationFilters => "linearRGB",
404        AttributeId::Direction => "ltr",
405        AttributeId::Display => "inline",
406        AttributeId::FontSize => "medium",
407        AttributeId::Overflow => "visible",
408        AttributeId::StrokeDashoffset => "0",
409        AttributeId::StrokeLinecap => "butt",
410        AttributeId::StrokeLinejoin => "miter",
411        AttributeId::StrokeMiterlimit => "4",
412        AttributeId::StrokeWidth => "1",
413        AttributeId::TextAnchor => "start",
414        AttributeId::Visibility => "visible",
415        AttributeId::WritingMode => "lr-tb",
416        _ => return false,
417    };
418
419    doc.append_attribute(aid, roxmltree::StringStorage::Borrowed(value));
420    true
421}
422
423fn resolve_href<'a, 'input: 'a>(
424    node: roxmltree::Node<'a, 'input>,
425) -> Option<roxmltree::Node<'a, 'input>> {
426    let link_value = node
427        .attribute((XLINK_NS, "href"))
428        .or_else(|| node.attribute("href"))?;
429
430    let link_id = svgtypes::IRI::from_str(link_value).ok()?.0;
431
432    // We're using `descendants` each time instead of HashTable because
433    // we have to preserve the original elements order.
434    // See tests/svg/e-use-024.svg
435    //
436    // Technically we can use https://crates.io/crates/hashlink,
437    // but this is an additional dependency.
438    // And performance even on huge files is still good enough.
439    node.document()
440        .descendants()
441        .find(|n| n.attribute("id") == Some(link_id))
442}
443
444fn parse_svg_use_element<'input>(
445    node: roxmltree::Node<'_, 'input>,
446    origin: roxmltree::Node,
447    parent_id: NodeId,
448    style_sheet: &simplecss::StyleSheet,
449    depth: u32,
450    doc: &mut Document<'input>,
451) -> Result<(), Error> {
452    let link = match resolve_href(node) {
453        Some(v) => v,
454        None => return Ok(()),
455    };
456
457    if link == node || link == origin {
458        log::warn!(
459            "Recursive 'use' detected. '{}' will be skipped.",
460            node.attribute((SVG_NS, "id")).unwrap_or_default()
461        );
462        return Ok(());
463    }
464
465    // Make sure we're linked to an SVG element.
466    if parse_tag_name(link).is_none() {
467        return Ok(());
468    }
469
470    // Check that none of the linked node's children reference current `use` node
471    // via other `use` node.
472    //
473    // Example:
474    // <g id="g1">
475    //     <use xlink:href="#use1" id="use2"/>
476    // </g>
477    // <use xlink:href="#g1" id="use1"/>
478    //
479    // `use2` should be removed.
480    //
481    // Also, child should not reference its parent:
482    // <g id="g1">
483    //     <use xlink:href="#g1" id="use1"/>
484    // </g>
485    //
486    // `use1` should be removed.
487    let mut is_recursive = false;
488    for link_child in link
489        .descendants()
490        .skip(1)
491        .filter(|n| n.has_tag_name((SVG_NS, "use")))
492    {
493        if let Some(link2) = resolve_href(link_child) {
494            if link2 == node || link2 == link {
495                is_recursive = true;
496                break;
497            }
498        }
499    }
500
501    if is_recursive {
502        log::warn!(
503            "Recursive 'use' detected. '{}' will be skipped.",
504            node.attribute((SVG_NS, "id")).unwrap_or_default()
505        );
506        return Ok(());
507    }
508
509    parse_xml_node(link, node, parent_id, style_sheet, true, depth + 1, doc)
510}
511
512fn resolve_css<'a>(xml: &'a roxmltree::Document<'a>) -> simplecss::StyleSheet<'a> {
513    let mut sheet = simplecss::StyleSheet::new();
514
515    for node in xml.descendants().filter(|n| n.has_tag_name("style")) {
516        match node.attribute("type") {
517            Some("text/css") => {}
518            Some(_) => continue,
519            None => {}
520        }
521
522        let text = match node.text() {
523            Some(v) => v,
524            None => continue,
525        };
526
527        sheet.parse_more(text);
528    }
529
530    sheet
531}
532
533struct XmlNode<'a, 'input: 'a>(roxmltree::Node<'a, 'input>);
534
535impl simplecss::Element for XmlNode<'_, '_> {
536    fn parent_element(&self) -> Option<Self> {
537        self.0.parent_element().map(XmlNode)
538    }
539
540    fn prev_sibling_element(&self) -> Option<Self> {
541        self.0.prev_sibling_element().map(XmlNode)
542    }
543
544    fn has_local_name(&self, local_name: &str) -> bool {
545        self.0.tag_name().name() == local_name
546    }
547
548    fn attribute_matches(&self, local_name: &str, operator: simplecss::AttributeOperator) -> bool {
549        match self.0.attribute(local_name) {
550            Some(value) => operator.matches(value),
551            None => false,
552        }
553    }
554
555    fn pseudo_class_matches(&self, class: simplecss::PseudoClass) -> bool {
556        match class {
557            simplecss::PseudoClass::FirstChild => self.prev_sibling_element().is_none(),
558            // TODO: lang
559            _ => false, // Since we are querying a static SVG we can ignore other pseudo-classes.
560        }
561    }
562}
563
564fn fix_recursive_patterns(doc: &mut Document) {
565    while let Some(node_id) = find_recursive_pattern(AttributeId::Fill, doc) {
566        let idx = doc.get(node_id).attribute_id(AttributeId::Fill).unwrap();
567        doc.attrs[idx].value = roxmltree::StringStorage::Borrowed("none");
568    }
569
570    while let Some(node_id) = find_recursive_pattern(AttributeId::Stroke, doc) {
571        let idx = doc.get(node_id).attribute_id(AttributeId::Stroke).unwrap();
572        doc.attrs[idx].value = roxmltree::StringStorage::Borrowed("none");
573    }
574}
575
576fn find_recursive_pattern(aid: AttributeId, doc: &mut Document) -> Option<NodeId> {
577    for pattern_node in doc
578        .root()
579        .descendants()
580        .filter(|n| n.tag_name() == Some(ElementId::Pattern))
581    {
582        for node in pattern_node.descendants() {
583            let value = match node.attribute(aid) {
584                Some(v) => v,
585                None => continue,
586            };
587
588            if let Ok(svgtypes::Paint::FuncIRI(link_id, _)) = svgtypes::Paint::from_str(value) {
589                if link_id == pattern_node.element_id() {
590                    // If a pattern child has a link to the pattern itself
591                    // then we have to replace it with `none`.
592                    // Otherwise we will get endless loop/recursion and stack overflow.
593                    return Some(node.id);
594                } else {
595                    // Check that linked node children doesn't link this pattern.
596                    if let Some(linked_node) = doc.element_by_id(link_id) {
597                        for node2 in linked_node.descendants() {
598                            let value2 = match node2.attribute(aid) {
599                                Some(v) => v,
600                                None => continue,
601                            };
602
603                            if let Ok(svgtypes::Paint::FuncIRI(link_id2, _)) =
604                                svgtypes::Paint::from_str(value2)
605                            {
606                                if link_id2 == pattern_node.element_id() {
607                                    return Some(node2.id);
608                                }
609                            }
610                        }
611                    }
612                }
613            }
614        }
615    }
616
617    None
618}
619
620fn fix_recursive_links(eid: ElementId, aid: AttributeId, doc: &mut Document) {
621    while let Some(node_id) = find_recursive_link(eid, aid, doc) {
622        let idx = doc.get(node_id).attribute_id(aid).unwrap();
623        doc.attrs[idx].value = roxmltree::StringStorage::Borrowed("none");
624    }
625}
626
627fn find_recursive_link(eid: ElementId, aid: AttributeId, doc: &Document) -> Option<NodeId> {
628    for node in doc
629        .root()
630        .descendants()
631        .filter(|n| n.tag_name() == Some(eid))
632    {
633        for child in node.descendants() {
634            if let Some(link) = child.node_attribute(aid) {
635                if link == node {
636                    // If an element child has a link to the element itself
637                    // then we have to replace it with `none`.
638                    // Otherwise we will get endless loop/recursion and stack overflow.
639                    return Some(child.id);
640                } else {
641                    // Check that linked node children doesn't link this element.
642                    for node2 in link.descendants() {
643                        if let Some(link2) = node2.node_attribute(aid) {
644                            if link2 == node {
645                                return Some(node2.id);
646                            }
647                        }
648                    }
649                }
650            }
651        }
652    }
653
654    None
655}
656
657/// Detects cases like:
658///
659/// ```xml
660/// <filter id="filter1">
661///   <feImage xlink:href="#rect1"/>
662/// </filter>
663/// <rect id="rect1" x="36" y="36" width="120" height="120" fill="green" filter="url(#filter1)"/>
664/// ```
665fn fix_recursive_fe_image(doc: &mut Document) {
666    let mut ids = Vec::new();
667    for fe_node in doc
668        .root()
669        .descendants()
670        .filter(|n| n.tag_name() == Some(ElementId::FeImage))
671    {
672        if let Some(link) = fe_node.node_attribute(AttributeId::Href) {
673            if let Some(filter_uri) = link.attribute(AttributeId::Filter) {
674                let filter_id = fe_node.parent().unwrap().element_id().to_string();
675                for func in svgtypes::FilterValueListParser::from(filter_uri).flatten() {
676                    if let svgtypes::FilterValue::Url(url) = func {
677                        if url == filter_id {
678                            ids.push(link.id);
679                        }
680                    }
681                }
682            }
683        }
684    }
685
686    for id in ids {
687        let idx = doc.get(id).attribute_id(AttributeId::Filter).unwrap();
688        doc.attrs[idx].value = roxmltree::StringStorage::Borrowed("none");
689    }
690}