oxvg_ast/
visitor.rs

1//! Visitors for traversing and manipulating nodes of an xml document
2use cfg_if::cfg_if;
3#[cfg(feature = "style")]
4use lightningcss::stylesheet;
5
6use crate::{
7    element::Element,
8    node::{self, Node},
9};
10
11#[cfg(feature = "style")]
12use crate::style::{self, ComputedStyles, ElementData};
13
14#[cfg(feature = "selectors")]
15use crate::selectors::Selector;
16
17#[derive(derive_more::Debug, Clone, Default)]
18/// Additional information about the current run of a visitor and it's context
19pub struct Info<'arena, E: Element<'arena>> {
20    /// The path of the file being processed. This should only be used for metadata purposes
21    /// and not for any filesystem requests.
22    pub path: Option<std::path::PathBuf>,
23    /// How many times the document has been processed so far, i.e. when it's processed
24    /// multiple times for further optimisation attempts
25    pub multipass_count: usize,
26    #[debug(skip)]
27    /// The allocator for the parsed file. Used for storing and creating new nodes within
28    /// the document.
29    pub arena: E::Arena,
30}
31
32impl<'arena, E: Element<'arena>> Info<'arena, E> {
33    /// Creates an instance of info with a reference to `arena` that can be used for allocating
34    /// new nodes
35    pub fn new(arena: E::Arena) -> Self {
36        Self {
37            path: None,
38            multipass_count: 0,
39            arena,
40        }
41    }
42}
43
44#[derive(Debug)]
45/// The context struct provides information about the document and it's effects on the visited node
46pub struct Context<'arena, 'i, 'o, E: Element<'arena>> {
47    #[cfg(feature = "style")]
48    /// Uses the style sheet to compute what css properties are applied to the node
49    pub computed_styles: crate::style::ComputedStyles<'i>,
50    #[cfg(feature = "style")]
51    /// A parsed stylesheet for all `<style>` nodes in the document
52    pub stylesheet: Option<lightningcss::stylesheet::StyleSheet<'i, 'o>>,
53    #[cfg(feature = "style")]
54    /// A collection of the inline style and presentation attributes for each element in the document
55    pub element_styles: &'i std::collections::HashMap<E, ElementData<'arena, E>>,
56    /// The root element of the document
57    pub root: E,
58    /// A set of boolean flags about the document and the visited node
59    pub flags: ContextFlags,
60    /// Info about how the program is using the document
61    pub info: &'i Info<'arena, E>,
62    #[cfg(not(feature = "style"))]
63    /// Marker to maintain consistent lifetime with `"style"` feature
64    marker: std::marker::PhantomData<(&'arena (), &'i (), &'o ())>,
65}
66
67impl<'arena, 'i, E: Element<'arena>> Context<'arena, 'i, '_, E> {
68    cfg_if! {
69        if #[cfg(feature = "style")] {
70            /// Instantiates the context with the given fields.
71            ///
72            /// The visitor should update the context as it visits each node.
73            pub fn new(
74                root: E,
75                flags: ContextFlags,
76                element_styles: &'i std::collections::HashMap<E, ElementData<'arena, E>>,
77                info: &'i Info<'arena, E>,
78            ) -> Self {
79                Self {
80                    computed_styles: crate::style::ComputedStyles::default(),
81                    stylesheet: None,
82                    element_styles,
83                    root,
84                    flags,
85                    info,
86                }
87            }
88        } else {
89            /// Instantiates the context with the given fields.
90            ///
91            /// The visitor should update the context as it visits each node.
92            pub fn new(
93                root: E,
94                flags: ContextFlags,
95                info: &'i Info<'arena, E>,
96            ) -> Self {
97                Self {
98                    root,
99                    flags,
100                    info,
101                    marker: std::marker::PhantomData,
102                }
103            }
104        }
105    }
106}
107
108bitflags! {
109    /// A set of flags controlling how a visitor should run following [Visitor::prepare]
110    pub struct PrepareOutcome: usize {
111        /// Nothing of importance to consider following preparation.
112        const none = 0b000_0000_0000;
113        /// The visitor shouldn't run following preparation.
114        const skip = 0b000_0000_0001;
115        #[cfg(feature = "style")]
116        /// Style information should be added to context while visiting
117        const use_style = 0b000_0010;
118    }
119}
120
121impl PrepareOutcome {
122    /// A shorthand to check whether the skip flag is enabled
123    pub fn can_skip(&self) -> bool {
124        self.contains(Self::skip)
125    }
126}
127
128bitflags! {
129    #[derive(Debug, Clone, Default)]
130    /// A set of boolean flags about the document and the visited node
131    pub struct ContextFlags: usize {
132        /// Whether the document has a script element, script href, or on-* attrs
133        const has_script_ref = 0b0001;
134        /// Whether the document has a non-empty stylesheet
135        const has_stylesheet = 0b0010;
136        #[cfg(feature = "style")]
137        /// Whether the computed styles will be used for each element
138        const use_style = 0b0100;
139        /// Whether this element is a `foreignObject` or a child of one
140        const within_foreign_object = 0b1000;
141        /// Whether to skip over the element's children or not
142        const skip_children = 0b1_0000;
143    }
144}
145
146impl ContextFlags {
147    #[cfg(feature = "selectors")]
148    /// Queries whether a `<script>` element is within the document
149    pub fn query_has_script<'arena, E: Element<'arena>>(&mut self, root: &E) {
150        self.set(Self::has_script_ref, has_scripts(root));
151    }
152
153    #[cfg(all(feature = "style", feature = "selectors"))]
154    /// Queries whether a `<style>` element is within the document
155    pub fn query_has_stylesheet<'arena, E: Element<'arena>>(&mut self, root: &E) {
156        self.set(Self::has_stylesheet, !style::root(root).is_empty());
157    }
158
159    /// Prevents the children of the current node from being visited
160    pub fn visit_skip(&mut self) {
161        log::debug!("skipping children");
162        self.set(Self::skip_children, true);
163    }
164}
165
166/// A trait for visiting or transforming the DOM
167#[allow(unused_variables)]
168pub trait Visitor<'arena, E: Element<'arena>> {
169    /// The type of errors which may be produced by the visitor
170    type Error;
171
172    /// Visits the document
173    ///
174    /// # Errors
175    /// Whether the visitor fails
176    fn document(
177        &self,
178        document: &mut E,
179        context: &Context<'arena, '_, '_, E>,
180    ) -> Result<(), Self::Error> {
181        Ok(())
182    }
183
184    /// Exits the document
185    ///
186    /// # Errors
187    /// Whether the visitor fails
188    fn exit_document(
189        &self,
190        document: &mut E,
191        context: &Context<'arena, '_, '_, E>,
192    ) -> Result<(), Self::Error> {
193        Ok(())
194    }
195
196    /// Visits a element
197    ///
198    /// # Errors
199    /// Whether the visitor fails
200    fn element(
201        &self,
202        element: &mut E,
203        context: &mut Context<'arena, '_, '_, E>,
204    ) -> Result<(), Self::Error> {
205        Ok(())
206    }
207
208    /// Exits a element
209    ///
210    /// # Errors
211    /// Whether the visitor fails
212    fn exit_element(
213        &self,
214        element: &mut E,
215        context: &mut Context<'arena, '_, '_, E>,
216    ) -> Result<(), Self::Error> {
217        Ok(())
218    }
219
220    /// Visits the doctype
221    ///
222    /// # Errors
223    /// Whether the visitor fails
224    fn doctype(&self, doctype: &mut <E as Node<'arena>>::Child) -> Result<(), Self::Error> {
225        Ok(())
226    }
227
228    /// Visits a text or cdata node
229    ///
230    /// # Errors
231    /// Whether the visitor fails
232    fn text_or_cdata(&self, node: &mut <E as Node<'arena>>::Child) -> Result<(), Self::Error> {
233        Ok(())
234    }
235
236    /// Visits a comment
237    ///
238    /// # Errors
239    /// Whether the visitor fails
240    fn comment(&self, comment: &mut <E as Node<'arena>>::Child) -> Result<(), Self::Error> {
241        Ok(())
242    }
243
244    /// Visits a processing instruction
245    ///
246    /// # Errors
247    /// Whether the visitor fails
248    fn processing_instruction(
249        &self,
250        processing_instruction: &mut <E as Node<'arena>>::Child,
251        context: &Context<'arena, '_, '_, E>,
252    ) -> Result<(), Self::Error> {
253        Ok(())
254    }
255
256    #[cfg(feature = "style")]
257    /// For implementors, determines whether style information should
258    /// be gathered and added to context prior to visiting an element.
259    fn use_style(&self, element: &E) -> bool {
260        false
261    }
262
263    /// After analysing the document, determines whether any extra features such as
264    /// style parsing or ignoring the tree is needed
265    ///
266    /// # Errors
267    /// Whether the visitor fails
268    fn prepare(
269        &self,
270        document: &E,
271        info: &Info<'arena, E>,
272        context_flags: &mut ContextFlags,
273    ) -> Result<PrepareOutcome, Self::Error> {
274        Ok(PrepareOutcome::none)
275    }
276
277    /// Creates context for root and visits it
278    ///
279    /// # Errors
280    /// If any of the visitor's methods fail
281    fn start(
282        &self,
283        root: &mut E,
284        info: &Info<'arena, E>,
285        flags: Option<ContextFlags>,
286    ) -> Result<PrepareOutcome, Self::Error> {
287        let mut flags = flags.unwrap_or_default();
288        let prepare_outcome = self.prepare(root, info, &mut flags)?;
289        if prepare_outcome.contains(PrepareOutcome::skip) {
290            return Ok(prepare_outcome);
291        }
292        cfg_if! {
293            if #[cfg(feature = "style")] {
294                let element_styles = &mut std::collections::HashMap::new();
295                if prepare_outcome.contains(PrepareOutcome::use_style) {
296                    let style_source = flag_style_source(&mut flags, root);
297                    let stylesheet = parse_stylesheet(style_source.as_str());
298                    *element_styles = ElementData::new(root);
299                    let mut context = Context::new(root.clone(), flags, element_styles, info);
300                    context.stylesheet = stylesheet;
301                    self.visit(root, &mut context)?;
302                } else {
303                    self.visit(
304                        root,
305                        &mut Context::new(root.clone(), flags, element_styles, info),
306                    )?;
307                };
308            } else {
309                self.visit(
310                    root,
311                    &mut Context::new(root.clone(), flags, info),
312                )?;
313            }
314        }
315        Ok(prepare_outcome)
316    }
317
318    /// Visits an element and it's children
319    ///
320    /// # Errors
321    /// If any of the visitor's methods fail
322    fn visit<'i>(
323        &self,
324        element: &mut E,
325        context: &mut Context<'arena, 'i, '_, E>,
326    ) -> Result<(), Self::Error> {
327        match element.node_type() {
328            node::Type::Document => {
329                self.document(element, context)?;
330                self.visit_children(element, context)?;
331                self.exit_document(element, context)
332            }
333            node::Type::Element => {
334                log::debug!("visiting {element:?}");
335                let is_root_foreign_object =
336                    !context.flags.contains(ContextFlags::within_foreign_object)
337                        && element.prefix().is_none()
338                        && element.local_name().as_ref() == "foreignObject";
339                if is_root_foreign_object {
340                    context.flags.set(ContextFlags::within_foreign_object, true);
341                }
342                cfg_if! {
343                    if #[cfg(feature = "style")] {
344                        let use_style = context.flags.contains(ContextFlags::use_style);
345                        if use_style && self.use_style(element) {
346                            context.computed_styles = ComputedStyles::<'i>::default().with_all(
347                                element,
348                                &context.stylesheet,
349                                context.element_styles,
350                            );
351                        } else {
352                            context.computed_styles = ComputedStyles::default();
353                            context.flags.set(ContextFlags::use_style, false);
354                        }
355                        self.element(element, context)?;
356                        context.flags.set(ContextFlags::use_style, use_style);
357                    } else {
358                        self.element(element, context)?;
359                    }
360                }
361                if context.flags.contains(ContextFlags::skip_children) {
362                    context.flags.set(ContextFlags::skip_children, false);
363                } else {
364                    self.visit_children(element, context)?;
365                }
366                log::debug!("left the {element:?}");
367                self.exit_element(element, context)?;
368                if is_root_foreign_object {
369                    context
370                        .flags
371                        .set(ContextFlags::within_foreign_object, false);
372                }
373                Ok(())
374            }
375            _ => Ok(()),
376        }
377    }
378
379    /// Visits the children of an element
380    ///
381    /// # Errors
382    /// If any of the visitor's methods fail
383    fn visit_children(
384        &self,
385        parent: &mut E,
386        context: &mut Context<'arena, '_, '_, E>,
387    ) -> Result<(), Self::Error> {
388        parent
389            .child_nodes_iter()
390            .try_for_each(|mut child| match child.node_type() {
391                node::Type::Document | node::Type::Element => {
392                    if let Some(mut child) = <E as Element>::new(child) {
393                        self.visit(&mut child, context)
394                    } else {
395                        Ok(())
396                    }
397                }
398                node::Type::Text | node::Type::CDataSection => self.text_or_cdata(&mut child),
399                node::Type::Comment => self.comment(&mut child),
400                node::Type::DocumentType => self.doctype(&mut child),
401                node::Type::ProcessingInstruction => {
402                    self.processing_instruction(&mut child, context)
403                }
404                node::Type::Attribute | node::Type::DocumentFragment => Ok(()),
405            })
406    }
407}
408
409#[cfg(feature = "style")]
410fn parse_stylesheet(code: &str) -> Option<stylesheet::StyleSheet> {
411    stylesheet::StyleSheet::parse(code, stylesheet::ParserOptions::default()).ok()
412}
413
414#[cfg(feature = "style")]
415fn flag_style_source<'arena, E: Element<'arena>>(flags: &mut ContextFlags, root: &E) -> String {
416    let style_source = style::root(root);
417    flags.set(ContextFlags::use_style, true);
418    flags.set(ContextFlags::has_stylesheet, !style_source.is_empty());
419    style_source
420}
421
422#[cfg(feature = "selectors")]
423/// Returns whether any potential scripting is contained in the document,
424/// including one of the following
425///
426/// - A `<script>` element
427/// - An `onbegin`, `onend`, `on...`, etc. attribute
428/// - A `href="javascript:..."` URL
429///
430/// # Panics
431///
432/// If the internal selector fails to build
433pub fn has_scripts<'arena, E: Element<'arena>>(root: &E) -> bool {
434    // PERF: Find a way to lazily evaluate selector
435    root.select_with_selector(Selector::new::<E>( "script,a[href^='javascript:'],[onbegin],[onend],[onrepeat],[onload],[onabort],[onerror],[onresize],[onscroll],[onunload],[onzoom],[oncopy],[oncut],[onpaste],[oncancel],[oncanplay],[oncanplaythrough],[onchange],[onclick],[onclose],[oncuechange],[ondblclick],[ondrag],[ondragend],[ondragenter],[ondragleave],[ondragover],[ondragstart],[ondrop],[ondurationchange],[onemptied],[onended],[onfocus],[oninput],[oninvalid],[onkeydown],[onkeypress],[onkeyup],[onloadeddata],[onloadedmetadata],[onloadstart],[onmousedown],[onmouseenter],[onmouseleave],[onmousemove],[onmouseout],[onmouseup],[onmousewheel],[onpause],[onplay],[onplaying],[onprogress],[onratechange],[onreset],[onseeked],[onseeking],[onselect],[onshow],[onstalled],[onsubmit],[onsuspend],[ontimeupdate],[ontoggle],[onvolumechange],[onwaiting],[onactivate],[onfocusin],[onfocusout],[onmouseover]" ).expect("known selector")).next().is_some()
436}