pochoir_parser/tree/
mod.rs

1#![allow(clippy::panic)]
2
3use pochoir_common::Spanned;
4use self_cell::self_cell;
5use std::{
6    collections::BTreeMap,
7    ops::{Add, AddAssign},
8    path::{Path, PathBuf},
9};
10
11use crate::{Node, ParsedNode};
12
13mod selection;
14mod traverse;
15mod tree_ref;
16
17pub use selection::build_selector_list;
18pub use tree_ref::{TreeRef, TreeRefMut};
19
20self_cell! {
21    /// An owned version of a [`Tree`] containing the HTML data used to build it.
22    ///
23    /// This structure uses [self_cell](https://docs.rs/self_cell) to build a self-referencing structure without a lifetime.
24    pub struct OwnedTree {
25        owner: String,
26
27        #[covariant]
28        dependent: Tree,
29    }
30
31    impl { Debug }
32}
33
34impl OwnedTree {
35    /// Get the inner HTML of the tree.
36    pub fn get_data(&self) -> &str {
37        self.borrow_owner()
38    }
39
40    /// Get an immutable reference to the inner tree.
41    pub fn get_tree(&self) -> &Tree<'_> {
42        self.borrow_dependent()
43    }
44
45    /// Mutate the tree temporarily by executing the given function.
46    pub fn mutate<Return>(&mut self, func: impl FnOnce(&mut Tree) -> Return) -> Return {
47        self.with_dependent_mut(|_, tree| func(tree))
48    }
49}
50
51#[derive(Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord, Hash)]
52pub enum TreeRefId {
53    Root,
54    Node(usize),
55}
56
57impl Add for TreeRefId {
58    type Output = Self;
59
60    fn add(self, rhs: Self) -> Self::Output {
61        match (self, rhs) {
62            (_, Self::Root) | (Self::Root, _) => Self::Root,
63            (Self::Node(a), Self::Node(b)) => Self::Node(a + b),
64        }
65    }
66}
67
68impl AddAssign<Self> for TreeRefId {
69    fn add_assign(&mut self, rhs: Self) {
70        match (self, rhs) {
71            (Self::Root | Self::Node(_), Self::Root) | (Self::Root, Self::Node(_)) => (),
72            (Self::Node(a), Self::Node(b)) => *a += b,
73        }
74    }
75}
76
77impl Add<usize> for TreeRefId {
78    type Output = Self;
79
80    fn add(self, rhs: usize) -> Self::Output {
81        match self {
82            Self::Root => Self::Root,
83            Self::Node(a) => Self::Node(a + rhs),
84        }
85    }
86}
87
88impl AddAssign<usize> for TreeRefId {
89    fn add_assign(&mut self, rhs: usize) {
90        match self {
91            Self::Root => (),
92            Self::Node(a) => *a += rhs,
93        }
94    }
95}
96
97#[derive(Debug, Clone)]
98struct TreeNode<'a> {
99    data: ParsedNode<'a>,
100    parent: TreeRefId,
101    children: Vec<TreeRefId>,
102}
103
104#[derive(Debug, Clone)]
105pub struct Tree<'a> {
106    file_path: PathBuf,
107    nodes: BTreeMap<TreeRefId, TreeNode<'a>>,
108    next_id: TreeRefId,
109}
110
111impl<'a> Tree<'a> {
112    /// Create an empty [`Tree`].
113    pub fn new<P: AsRef<Path>>(file_path: P) -> Self {
114        Self {
115            file_path: file_path.as_ref().into(),
116            nodes: BTreeMap::from_iter([(
117                TreeRefId::Root,
118                TreeNode {
119                    data: Spanned::new(Node::Root),
120                    parent: TreeRefId::Root,
121                    children: vec![],
122                },
123            )]),
124            next_id: TreeRefId::Node(0),
125        }
126    }
127
128    pub fn file_path(&self) -> &Path {
129        &self.file_path
130    }
131
132    pub fn next_id(&self) -> TreeRefId {
133        self.next_id
134    }
135
136    /// Insert a new node in the tree as a child of `parent`.
137    ///
138    /// # Panics
139    ///
140    /// This function panics if the provided parent ID does not exist in the tree. It may happen if
141    /// the ID is wrong, does not exist in the tree or if the node having this ID was removed.
142    pub fn insert(&mut self, parent: TreeRefId, data: ParsedNode<'a>) -> TreeRefId {
143        let id = self.next_id;
144        self.nodes.insert(
145            id,
146            TreeNode {
147                data,
148                parent,
149                children: vec![],
150            },
151        );
152
153        // Update parent
154        self.nodes
155            .get_mut(&parent)
156            .expect("failed to find parent node in tree")
157            .children
158            .push(id);
159
160        self.next_id += 1;
161        id
162    }
163
164    fn insert_with_id(
165        &mut self,
166        id: TreeRefId,
167        parent: TreeRefId,
168        children: Vec<TreeRefId>,
169        data: ParsedNode<'a>,
170    ) {
171        self.nodes.insert(
172            id,
173            TreeNode {
174                data,
175                parent,
176                children,
177            },
178        );
179    }
180
181    /// Get the list of all the tree nodes.
182    ///
183    /// Removed nodes won't be in the list.
184    pub fn all_nodes(&self) -> Vec<TreeRefId> {
185        self.nodes.iter().map(|n| *n.0).collect()
186    }
187
188    /// Get the list of the tree root nodes.
189    pub fn root_nodes(&self) -> Vec<TreeRefId> {
190        self.nodes
191            .iter()
192            .filter(|n| *n.0 != TreeRefId::Root && n.1.parent == TreeRefId::Root)
193            .map(|n| *n.0)
194            .collect()
195    }
196
197    /// Get a reference to a tree node or panic if the ID does not exist.
198    ///
199    /// For a non-panicking version, see [`Tree::try_get`].
200    ///
201    /// # Panics
202    ///
203    /// This function panics if the provided ID does not exist in the tree. It may happen if the
204    /// node ID is hard-coded and is not yet present in the tree. Keep in mind that removing a node
205    /// consumes the reference so this function is very unlikely to panic.
206    pub fn get(&self, id: TreeRefId) -> TreeRef<'a, '_> {
207        self.try_get(id).unwrap_or_else(|| {
208            panic!("failed to get a reference to the tree node with ID {id:?}");
209        })
210    }
211
212    /// Get a mutable reference to a tree node or panic if the ID does not exist.
213    ///
214    /// For a non-panicking version, see [`Tree::try_get_mut`].
215    ///
216    /// # Panics
217    ///
218    /// This function panics if the provided ID does not exist in the tree. It may happen if the
219    /// node ID is hard-coded and is not yet present in the tree. Keep in mind that removing a node
220    /// consumes the reference so this function is very unlikely to panic.
221    pub fn get_mut(&mut self, id: TreeRefId) -> TreeRefMut<'a, '_> {
222        self.try_get_mut(id).unwrap_or_else(|| {
223            panic!("failed to get a mutable reference to the tree node with ID {id:?}");
224        })
225    }
226
227    /// Try to get a tree reference from a node ID.
228    ///
229    /// Returns `None` if the ID is not present in tree.
230    pub fn try_get(&self, id: TreeRefId) -> Option<TreeRef<'a, '_>> {
231        if self.nodes.contains_key(&id) {
232            Some(TreeRef { id, tree: self })
233        } else {
234            None
235        }
236    }
237
238    /// Try to get a mutable tree reference from a node ID.
239    ///
240    /// Returns `None` if the ID is not present in tree.
241    pub fn try_get_mut(&mut self, id: TreeRefId) -> Option<TreeRefMut<'a, '_>> {
242        if self.nodes.contains_key(&id) {
243            Some(TreeRefMut { id, tree: self })
244        } else {
245            None
246        }
247    }
248
249    pub fn traverse_breadth(&self) -> impl Iterator<Item = TreeRef<'a, '_>> {
250        let parent = self.get(TreeRefId::Root);
251
252        traverse::TraverseBreadthIter {
253            children: parent.children().collect(),
254            index: 0,
255        }
256    }
257
258    pub fn traverse_depth(&self) -> impl Iterator<Item = TreeRef<'a, '_>> {
259        let mut queue: Vec<TreeRef> = self.get(TreeRefId::Root).children().collect();
260        queue.reverse();
261
262        traverse::TraverseDepthIter { queue }
263    }
264
265    /// Search the first node matching the CSS selector in the tree, relatively to this node.
266    ///
267    /// The traversal order used is the same as in the [official DOM specification](https://dom.spec.whatwg.org/#concept-tree-order)
268    /// ie preorder, depth-first.
269    ///
270    /// It is similar to the `document.querySelector` function in JavaScript.
271    ///
272    /// # Errors
273    ///
274    /// Returns [`selectors::parser::SelectorParseError`] if the CSS selector is not a valid CSS selector
275    /// or `Ok(None)` if no node matching the CSS selector is found in the tree
276    pub fn select<'b>(
277        &self,
278        selector: &'b str,
279    ) -> Result<Option<TreeRefId>, crate::error::SelectorParseError<'b>> {
280        let selector_list =
281            build_selector_list(selector)?;
282        Ok(self.traverse_depth()
283            .find(|tree_ref| tree_ref.is_matching(&selector_list))
284            .map(|tree_ref| tree_ref.id()))
285    }
286
287    /// Select several nodes in the tree using a CSS selector.
288    ///
289    /// It is similar to the `document.querySelectorAll` function in JavaScript.
290    ///
291    /// Returns an empty `Vec` if the selector is not a valid CSS selector.
292    pub fn select_all(&self, selector: &str) -> Vec<TreeRefId> {
293        if let Ok(selector_list) = build_selector_list(selector) {
294            self.traverse_depth()
295                .filter(|tree_ref| tree_ref.is_matching(&selector_list))
296                .map(|tree_ref| tree_ref.id())
297                .collect()
298        } else {
299            vec![]
300        }
301    }
302}
303
304#[cfg(test)]
305#[allow(clippy::similar_names)]
306mod tests {
307    use pochoir_template_engine::TemplateBlock;
308    use std::borrow::Cow;
309
310    use crate::Attrs;
311
312    use super::*;
313
314    #[test]
315    #[allow(clippy::similar_names)]
316    fn make_tree() {
317        let mut tree = Tree::new("index.html");
318
319        let container_node = Node::Element(Cow::Borrowed("div"), Attrs::new());
320        let text_node = Node::TemplateBlock(TemplateBlock::RawText(Cow::Borrowed("Hello world!")));
321        let text2_node =
322            Node::TemplateBlock(TemplateBlock::RawText(Cow::Borrowed("Hello world 2!")));
323        let link_node = Node::Element(
324            Cow::Borrowed("a"),
325            Attrs::from_iter([(
326                Cow::Borrowed("href"),
327                vec![Spanned::new(TemplateBlock::text("https://example.com"))],
328            )]),
329        );
330
331        let container_id = tree.insert(TreeRefId::Root, Spanned::new(container_node.clone()));
332        let text_id = tree.insert(container_id, Spanned::new(text_node.clone()));
333        let text2_id = tree.insert(container_id, Spanned::new(text2_node.clone()));
334        let link_id = tree.insert(container_id, Spanned::new(link_node.clone()));
335
336        let container = tree.get(container_id);
337        let text = tree.get(text_id); // Test relations
338        let text2 = tree.get(text2_id);
339        let link = tree.get(link_id);
340
341        // Test data
342        assert_eq!(*container.data(), container_node);
343        assert_eq!(*text.data(), text_node);
344        assert_eq!(*text2.data(), text2_node);
345        assert_eq!(*link.data(), link_node);
346
347        // Test relations
348        assert_eq!(container.parent(), tree.get(TreeRefId::Root));
349        assert_eq!(text.parent(), tree.get(container_id));
350        assert_eq!(text2.parent(), tree.get(container_id));
351        assert_eq!(link.parent(), tree.get(container_id));
352
353        assert_eq!(container.prev_sibling(), None);
354        assert_eq!(text.prev_sibling(), None);
355        assert_eq!(text2.prev_sibling(), Some(tree.get(text_id)));
356        assert_eq!(link.prev_sibling(), Some(tree.get(text2_id)));
357
358        assert_eq!(container.next_sibling(), None);
359        assert_eq!(text.next_sibling(), Some(tree.get(text2_id)));
360        assert_eq!(text2.next_sibling(), Some(tree.get(link_id)));
361        assert_eq!(link.next_sibling(), None);
362
363        assert_eq!(container.children().collect::<Vec<TreeRef>>(), vec![text, text2, link]);
364        assert_eq!(text.children().collect::<Vec<TreeRef>>(), vec![]);
365        assert_eq!(text2.children().collect::<Vec<TreeRef>>(), vec![]);
366        assert_eq!(link.children().collect::<Vec<TreeRef>>(), vec![]);
367    }
368
369    #[test]
370    fn traverse_tree() {
371        let mut tree = Tree::new("index.html");
372
373        let container_node = Node::Element(Cow::Borrowed("div"), Attrs::new());
374        let text_node = Node::TemplateBlock(TemplateBlock::RawText(Cow::Borrowed("Hello world!")));
375        let text2_node =
376            Node::TemplateBlock(TemplateBlock::RawText(Cow::Borrowed("Hello world 2!")));
377        let link_node = Node::Element(
378            Cow::Borrowed("a"),
379            Attrs::from_iter([(
380                Cow::Borrowed("href"),
381                vec![Spanned::new(TemplateBlock::text("https://example.com"))],
382            )]),
383        );
384
385        let container_id = tree.insert(TreeRefId::Root, Spanned::new(container_node.clone()));
386        let link_id = tree.insert(container_id, Spanned::new(link_node.clone()));
387        let text_id = tree.insert(container_id, Spanned::new(text_node.clone()));
388        let text2_id = tree.insert(link_id, Spanned::new(text2_node.clone()));
389
390        let ids: Vec<TreeRefId> = tree
391            .traverse_breadth()
392            .map(|tree_ref| tree_ref.id())
393            .collect();
394        assert_eq!(ids, vec![container_id, link_id, text_id, text2_id]);
395
396        let ids: Vec<TreeRefId> = tree
397            .traverse_depth()
398            .map(|tree_ref| tree_ref.id())
399            .collect();
400        assert_eq!(ids, vec![container_id, link_id, text2_id, text_id]);
401    }
402
403    #[test]
404    fn select() {
405        let mut tree = Tree::new("index.html");
406
407        let container_node = Node::Element(Cow::Borrowed("div"), Attrs::new());
408        let link_node = Node::Element(
409            Cow::Borrowed("a"),
410            Attrs::from_iter([
411                (
412                    Cow::Borrowed("href"),
413                    vec![Spanned::new(TemplateBlock::text("https://example.com"))],
414                ),
415                (
416                    Cow::Borrowed("class"),
417                    vec![Spanned::new(TemplateBlock::text("link"))],
418                ),
419            ]),
420        );
421
422        let container_id = tree.insert(TreeRefId::Root, Spanned::new(container_node.clone()));
423        let link_id = tree.insert(container_id, Spanned::new(link_node.clone()));
424        let container_cloned_id = tree.insert(container_id, Spanned::new(container_node.clone()));
425        let link_cloned_id = tree.insert(container_cloned_id, Spanned::new(link_node.clone()));
426
427        let container_cloned = tree.get(container_cloned_id);
428        let link_cloned = tree.get(link_cloned_id);
429
430        assert_eq!(tree.select("a.link").unwrap(), Some(link_id));
431        assert_eq!(tree.select("div").unwrap(), Some(container_id));
432        assert_eq!(tree.select("div > div").unwrap(), Some(container_cloned_id));
433        assert_eq!(container_cloned.select(".link").unwrap(), Some(link_cloned));
434        assert_eq!(
435            tree.get(tree.select("div > div").unwrap().unwrap()).select(".link").unwrap(),
436            Some(link_cloned)
437        );
438        assert_eq!(tree.select_all(".link"), vec![link_id, link_cloned_id]);
439    }
440
441    #[test]
442    fn update_tree() {
443        let mut tree = Tree::new("index.html");
444
445        let container_node = Node::Element(Cow::Borrowed("div"), Attrs::new());
446        let link_node = Node::Element(
447            Cow::Borrowed("a"),
448            Attrs::from_iter([
449                (
450                    Cow::Borrowed("href"),
451                    vec![Spanned::new(TemplateBlock::text("https://example.com"))],
452                ),
453                (
454                    Cow::Borrowed("class"),
455                    vec![Spanned::new(TemplateBlock::text("link"))],
456                ),
457            ]),
458        );
459
460        let container_id = tree.insert(TreeRefId::Root, Spanned::new(container_node.clone()));
461        let link_id = tree.insert(container_id, Spanned::new(link_node.clone()));
462        let container_cloned_id = tree.insert(container_id, Spanned::new(container_node.clone()));
463        let link_cloned_id = tree.insert(container_cloned_id, Spanned::new(link_node.clone()));
464
465        tree.get_mut(container_cloned_id).remove();
466
467        assert_eq!(tree.try_get(container_cloned_id), None);
468        assert_eq!(tree.try_get(link_cloned_id), None);
469        assert_eq!(tree.get(container_id).children().collect::<Vec<TreeRef>>(), vec![tree.get(link_id)]);
470
471        let container2_id = tree.insert(container_id, Spanned::new(container_node));
472        let _link2_id = tree.insert(container2_id, Spanned::new(link_node));
473
474        let container = tree.get(container_id);
475
476        let removed_nodes: Vec<TreeRefId> = container
477            .traverse_depth()
478            .filter(|tree_ref| tree_ref.attr("class") == Ok(Some("link".to_string())))
479            .map(|tree_ref| tree_ref.id())
480            .collect();
481
482        for id in removed_nodes {
483            tree.get_mut(id).remove();
484        }
485
486        assert_eq!(tree.get(container_id).traverse_breadth().filter(|tree_ref| matches!(&tree_ref.data(), Node::Element(_, attrs) if attrs.get("class") == Some(&vec![Spanned::new(TemplateBlock::text("link"))]))).count(), 0);
487    }
488
489    #[test]
490    fn sub_tree() {
491        let mut tree = Tree::new("index.html");
492
493        let container_node = Node::Element(Cow::Borrowed("div"), Attrs::new());
494        let text_node = Node::TemplateBlock(TemplateBlock::RawText(Cow::Borrowed("Hello world!")));
495        let text2_node =
496            Node::TemplateBlock(TemplateBlock::RawText(Cow::Borrowed("Hello world 2!")));
497        let link_node = Node::Element(
498            Cow::Borrowed("a"),
499            Attrs::from_iter([(
500                Cow::Borrowed("href"),
501                vec![Spanned::new(TemplateBlock::text("https://example.com"))],
502            )]),
503        );
504
505        let container_id = tree.insert(TreeRefId::Root, Spanned::new(container_node.clone()));
506        let text_id = tree.insert(container_id, Spanned::new(text_node.clone()));
507        let link_id = tree.insert(container_id, Spanned::new(link_node.clone()));
508        let text2_id = tree.insert(link_id, Spanned::new(text2_node.clone()));
509
510        let link = tree.get(link_id);
511        let sub_tree = link.sub_tree();
512        assert_eq!(sub_tree.try_get(container_id), None);
513        assert_eq!(sub_tree.try_get(text_id), None);
514        assert_eq!(sub_tree.get(text2_id).data(), &text2_node);
515    }
516}