lssg_lib/html/
domtree.rs

1use std::{
2    collections::HashMap,
3    fmt,
4    ops::{Index, IndexMut},
5};
6
7use crate::tree::{Node, Tree, DFS};
8
9use super::Html;
10
11#[derive(Debug, Clone)]
12pub enum DomNodeKind {
13    Text {
14        text: String,
15    },
16    Element {
17        tag: String,
18        attributes: HashMap<String, String>,
19    },
20}
21
22#[derive(Debug, Clone)]
23pub struct DomNode {
24    pub kind: DomNodeKind,
25    children: Vec<usize>,
26    parent: Option<usize>,
27}
28
29impl Node for DomNode {
30    fn children(&self) -> &Vec<usize> {
31        &self.children
32    }
33}
34
35pub type DomId = usize;
36
37/// Tree representation of html (DOM).
38///
39/// **Will panic if invalid or removed id's are used.**
40#[derive(Debug, Clone)]
41pub struct DomTree {
42    root: DomId,
43    nodes: Vec<Option<DomNode>>,
44}
45
46impl Tree for DomTree {
47    type Node = DomNode;
48
49    fn root(&self) -> DomId {
50        self.root
51    }
52
53    fn get(&self, id: DomId) -> &DomNode {
54        &self[id]
55    }
56}
57
58impl DomTree {
59    pub fn new() -> DomTree {
60        let mut tree = DomTree {
61            root: 0,
62            nodes: vec![Some(DomNode {
63                kind: DomNodeKind::Element {
64                    tag: "html".to_string(),
65                    attributes: HashMap::new(),
66                },
67                children: vec![],
68                parent: None,
69            })],
70        };
71        tree.add_element(tree.root, "head");
72        tree.add_element(tree.root, "body");
73
74        return tree;
75    }
76
77    pub fn head(&self) -> DomId {
78        return 1;
79    }
80
81    pub fn body(&self) -> DomId {
82        return 2;
83    }
84
85    pub fn get_mut(&mut self, id: DomId) -> &mut DomNode {
86        self.nodes.get_mut(id).unwrap().as_mut().unwrap()
87    }
88
89    /// Get all elements with a certain html tag
90    pub fn get_elements_by_tag_name(&self, tag_name: impl Into<String>) -> Vec<DomId> {
91        let tag_name = tag_name.into();
92        return DFS::new(self)
93            .filter(|id| {
94                if let DomNodeKind::Element { tag, .. } = &self[*id].kind {
95                    if tag == &tag_name {
96                        return true;
97                    }
98                }
99                false
100            })
101            .collect();
102    }
103
104    /// Add parsed html to tree
105    pub fn add_html(&mut self, parent_id: DomId, html: Html) -> Option<usize> {
106        match html {
107            Html::Comment { .. } => None,
108            Html::Text { text } => Some(self.add_text(parent_id, text)),
109            Html::Element {
110                tag,
111                attributes,
112                children,
113            } => {
114                let element = self.add_element_with_attributes(parent_id, tag, attributes);
115                for child in children {
116                    self.add_html(element, child);
117                }
118                Some(element)
119            }
120        }
121    }
122
123    /// Add a node to the tree return the id (index) of the node
124    pub fn add(&mut self, parent_id: DomId, kind: DomNodeKind) -> usize {
125        self.nodes.push(Some(DomNode {
126            kind,
127            children: vec![],
128            parent: Some(parent_id),
129        }));
130        let id = self.nodes.len() - 1;
131        self[parent_id].children.push(id);
132        id
133    }
134
135    /// Add a node to the tree return the id (index) of the node
136    pub fn add_element(&mut self, parent_id: DomId, tag: impl Into<String>) -> usize {
137        self.add(
138            parent_id,
139            DomNodeKind::Element {
140                tag: tag.into(),
141                attributes: HashMap::new(),
142            },
143        )
144    }
145
146    pub fn add_element_with_attributes(
147        &mut self,
148        parent_id: DomId,
149        tag: impl Into<String>,
150        attributes: HashMap<String, String>,
151    ) -> DomId {
152        self.add(
153            parent_id,
154            DomNodeKind::Element {
155                tag: tag.into(),
156                attributes,
157            },
158        )
159    }
160
161    /// Add a node to the tree return the id (index) of the node
162    pub fn add_text(&mut self, parent_id: DomId, text: impl Into<String>) -> usize {
163        self.add(parent_id, DomNodeKind::Text { text: text.into() })
164    }
165
166    /// set a new parent for a node
167    pub fn set_parent(&mut self, id: DomId, new_parent: DomId) {
168        match self[id].parent {
169            Some(parent) => {
170                let parent_node = &mut self[parent];
171                // remove from old parent
172                if let Some(pos) = parent_node.children.iter().position(|c| *c == id) {
173                    parent_node.children.remove(pos);
174                }
175                // add to new parent
176                self[new_parent].children.push(id);
177                self[id].parent = Some(new_parent)
178            }
179            None => panic!("Can't set parent for root node"),
180        }
181    }
182
183    /// Remove a node and connect its children to its parent
184    pub fn remove(&mut self, id: DomId) {
185        let p = self[id].parent.expect("can't remove root");
186        let parent = &mut self[p];
187        // remove node from parent
188        if let Some(pos) = parent.children.iter().position(|c| *c == id) {
189            parent.children.remove(pos);
190        }
191        // add children to node parent
192        let children = self[id].children.clone();
193        for c in children.into_iter() {
194            (&mut self[p]).children.push(c);
195            self[c].parent = Some(p);
196        }
197        self.nodes[id] = None;
198    }
199
200    /// Remove empty tags or invalid html in a way that makes sense
201    pub fn validate(&mut self) {
202        fn validate_recurs(tree: &mut DomTree, id: DomId) {
203            for child in tree[id].children.clone().into_iter() {
204                validate_recurs(tree, child);
205            }
206
207            let node = &tree[id];
208
209            match &node.kind {
210                DomNodeKind::Text { text } => {
211                    if text.len() == 0 {
212                        tree.remove(id);
213                    }
214                }
215                DomNodeKind::Element { tag, .. } => match tag.as_str() {
216                    "p" => {
217                        if node.children().len() == 0 {
218                            tree.remove(id);
219                        }
220                    }
221                    _ => {}
222                },
223            }
224        }
225        validate_recurs(self, self.root());
226    }
227
228    fn to_html_content_recurs(&self, index: DomId) -> String {
229        let node = &self[index];
230        match &node.kind {
231            DomNodeKind::Text { text } => return text.clone(),
232            DomNodeKind::Element { tag, attributes } => {
233                let attributes = attributes
234                    .into_iter()
235                    .map(|(k, v)| {
236                        if v.len() > 0 {
237                            format!(r#"{k}="{v}""#)
238                        } else {
239                            k.into()
240                        }
241                    })
242                    .collect::<Vec<String>>()
243                    .join(" ");
244
245                let spacing = if attributes.len() > 0 {
246                    String::from(" ")
247                } else {
248                    String::new()
249                };
250
251                if node.children.len() == 0 {
252                    match tag.as_str() {
253                        "link" | "meta" => {
254                            return format!("<{tag}{spacing}{}/>", attributes);
255                        }
256                        _ => {}
257                    }
258                }
259
260                let mut content = String::new();
261
262                for c in &node.children {
263                    content += &self.to_html_content_recurs(*c);
264                }
265
266                return format!("<{tag}{spacing}{}>{}</{tag}>", attributes, content);
267            }
268        };
269    }
270
271    pub fn to_html_string(self) -> String {
272        let html = self.to_html_content_recurs(self.root);
273        return format!(r#"<!DOCTYPE html>{html}"#);
274    }
275}
276
277impl fmt::Display for DomTree {
278    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
279        // fill in table
280        let mut row_length = 0;
281        let mut table: Vec<Vec<Option<String>>> = vec![];
282        let mut prev_col = 0;
283        let mut queue = vec![(self.root(), 0)];
284        while let Some((n, col)) = queue.pop() {
285            let node = &self[n];
286            for c in &node.children {
287                queue.push((c.clone(), col + 1))
288            }
289
290            // create col if not exists
291            if let None = table.get(col) {
292                table.push(vec![]);
293            }
294
295            // fill in until we reach the current row where we are
296            let amount_rows_in_col = table[col].len();
297            // if going back fill all the way
298            if prev_col > col {
299                for _ in amount_rows_in_col..row_length {
300                    table[col].push(None);
301                }
302            } else {
303                // if going forward fill to current row - 1
304                for _ in amount_rows_in_col + 1..row_length {
305                    table[col].push(None);
306                }
307            }
308            prev_col = col;
309
310            let name = match &node.kind {
311                DomNodeKind::Text { text, .. } => {
312                    let mut text = text.clone();
313                    text.truncate(10);
314                    if text.len() == 10 {
315                        format!(r#"{text}.."#)
316                    } else {
317                        format!(r#"{text}"#)
318                    }
319                }
320                DomNodeKind::Element { tag: kind, .. } => format!("<{}>", kind.to_owned()),
321            };
322            let node_name = format!("{}({})", name, n);
323            table[col].push(Some(node_name));
324
325            let amount_rows_in_col = table[col].len();
326            // update at what row we are
327            if amount_rows_in_col > row_length {
328                row_length = amount_rows_in_col;
329            }
330        }
331
332        // display table
333        let mut out = vec![String::new(); row_length];
334        for col in 0..table.len() {
335            let max_name_length = table[col]
336                .iter()
337                .map(|c| c.as_ref().map(|c| c.len()).unwrap_or(0))
338                .reduce(|a, b| a.max(b))
339                .unwrap_or(0);
340            for (row, entry) in table[col].iter().enumerate() {
341                match entry {
342                    Some(name) => {
343                        out[row] += name;
344                        out[row] += &" ".repeat(max_name_length - name.len());
345                        if let Some(next_column) = table.get(col + 1) {
346                            if let Some(Some(_)) = next_column.get(row) {
347                                out[row] += &" - ";
348                                continue;
349                            }
350                        }
351                        out[row] += &"   ";
352                    }
353                    None => out[row] += &" ".repeat(max_name_length + 3),
354                }
355            }
356            for row in table[col].len()..row_length {
357                out[row] += &" ".repeat(max_name_length + 3);
358            }
359        }
360
361        f.write_str(&out.join("\n"))?;
362        Ok(())
363    }
364}
365
366// FIXME proc attributes
367// #[macro_export]
368// macro_rules! attributes {
369//     (($x:tt)=($v:tt);*) => {{
370//         let mut attributes = HashMap::new();
371//         attributes.insert($x, $v);
372//         attributes
373//     }};
374// }
375
376/// Utility function to convert iteratables into attributes hashmap
377pub fn to_attributes<I: IntoIterator<Item = (impl Into<String>, impl Into<String>)>>(
378    arr: I,
379) -> HashMap<String, String> {
380    arr.into_iter().map(|(k, v)| (k.into(), v.into())).collect()
381}
382
383impl Index<DomId> for DomTree {
384    type Output = DomNode;
385
386    fn index(&self, index: DomId) -> &Self::Output {
387        self.nodes.get(index).unwrap().as_ref().unwrap()
388    }
389}
390impl IndexMut<DomId> for DomTree {
391    fn index_mut(&mut self, index: DomId) -> &mut Self::Output {
392        self.nodes.get_mut(index).unwrap().as_mut().unwrap()
393    }
394}
395
396#[cfg(test)]
397mod tests {
398    use super::*;
399
400    #[test]
401    fn test_remove() {
402        let mut tree = DomTree::new();
403        let body = tree.get_elements_by_tag_name("body")[0];
404        let p = tree.add_element(body, "p");
405        let text = tree.add_text(p, "This is a paragraph");
406
407        tree.remove(p);
408        assert!(tree.nodes.get(p).unwrap().is_none());
409        assert_eq!(tree[body].children, vec![text]);
410        assert_eq!(tree[text].parent.unwrap(), body);
411    }
412}