note_mark/model/
html.rs

1//! HTML document model.
2//!
3//! This module contains the data structures used to represent an HTML.
4
5use std::borrow::Cow;
6
7/// The struct to represent an root HTML document.
8#[derive(Debug, Clone, PartialEq, Eq)]
9pub struct DocumentNode<'a> {
10    pub root: Vec<Node<'a>>,
11}
12
13/// The enum to represent an HTML element tag.
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub enum ElementTag {
16    Div,
17    Span,
18    P,
19    H1,
20    H2,
21    H3,
22    H4,
23    H5,
24    H6,
25    Ul,
26    Ol,
27    Li,
28    Blockquote,
29    A,
30    Strong,
31    Em,
32    Br,
33}
34
35impl ElementTag {
36    /// Whether this tag is a block item.
37    pub fn is_block_item(&self) -> bool {
38        matches!(
39            self,
40            ElementTag::Div
41                | ElementTag::P
42                | ElementTag::Ul
43                | ElementTag::Ol
44                | ElementTag::Li
45                | ElementTag::H1
46                | ElementTag::H2
47                | ElementTag::H3
48                | ElementTag::H4
49                | ElementTag::H5
50                | ElementTag::H6
51        )
52    }
53}
54
55impl ElementTag {
56    /// Create a new ElementTag from a headline level.
57    pub fn headline(level: u8) -> Option<Self> {
58        match level {
59            1 => Some(Self::H1),
60            2 => Some(Self::H2),
61            3 => Some(Self::H3),
62            4 => Some(Self::H4),
63            5 => Some(Self::H5),
64            6 => Some(Self::H6),
65            _ => None,
66        }
67    }
68
69    /// Return headline level if this tag is a headline.
70    pub fn get_headline_level(&self) -> Option<u8> {
71        match self {
72            Self::H1 => Some(1),
73            Self::H2 => Some(2),
74            Self::H3 => Some(3),
75            Self::H4 => Some(4),
76            Self::H5 => Some(5),
77            Self::H6 => Some(6),
78            _ => None,
79        }
80    }
81}
82
83/// The enum to represent an HTML node.
84#[derive(Debug, Clone, PartialEq, Eq)]
85pub enum Node<'a> {
86    Element(ElementNode<'a>),
87    Text(TextNode<'a>),
88}
89
90impl Node<'_> {
91    /// Whether this node is a block item.
92    pub fn is_block_item(&self) -> bool {
93        match self {
94            Node::Element(element) => element.tag.is_block_item(),
95            Node::Text(_) => false,
96        }
97    }
98}
99
100/// Stringify a node.
101pub fn get_text(nodes: &[Node<'_>]) -> String {
102    nodes
103        .iter()
104        .map(|node| match node {
105            Node::Element(element) => get_text(&element.children),
106            Node::Text(text) => text.text.to_string(),
107        })
108        .collect::<Vec<_>>()
109        .join("")
110}
111
112/// The struct to represent an HTML element node.
113#[derive(Debug, Clone, PartialEq, Eq)]
114pub struct ElementNode<'a> {
115    /// The tag of this element.
116    pub tag: ElementTag,
117    /// The id of this element.
118    pub id: Vec<String>,
119    /// The classes of this element.
120    pub class: Vec<String>,
121    /// The href of this element.
122    pub href: Option<String>,
123    /// The attributes of this element.
124    pub attrs: Vec<(String, String)>,
125    /// The children of this element.
126    pub children: Vec<Node<'a>>,
127}
128
129impl Default for ElementNode<'_> {
130    fn default() -> Self {
131        Self {
132            tag: ElementTag::Div,
133            id: vec![],
134            class: vec![],
135            href: None,
136            attrs: vec![],
137            children: vec![],
138        }
139    }
140}
141
142/// The struct to represent an HTML text node.
143#[derive(Debug, Clone, PartialEq, Eq)]
144pub struct TextNode<'a> {
145    pub text: Cow<'a, str>,
146}