parse_html/parser/
ast.rs

1use crate::{
2    node::{ElementNode, Node},
3    token::Token,
4};
5
6use super::{error::ParserError, parser_trait::ParserTrait};
7
8pub struct Parser {
9    tokens: Vec<Token>,
10    position: usize,
11}
12
13impl Parser {
14    fn next_token(&mut self) -> Option<Token> {
15        if self.position < self.tokens.len() {
16            let token = self.tokens[self.position].clone();
17            self.position += 1;
18            Some(token)
19        } else {
20            None
21        }
22    }
23
24    fn read_attributes(&mut self) -> Vec<(String, String)> {
25        let mut attributes = Vec::new();
26
27        while let Some(token) = self.next_token() {
28            match token {
29                Token::AttributeName(attr_name) => {
30                    if let Some(Token::AttributeValue(attr_value)) = self.next_token() {
31                        attributes.push((attr_name, attr_value));
32                    } else {
33                        attributes.push((attr_name, "true".to_string()));
34                        self.position -= 1;
35                    }
36                }
37                _ => {
38                    self.position -= 1;
39                    break;
40                }
41            }
42        }
43
44        attributes
45    }
46
47    fn parse_element(&mut self, tag_name: String) -> Result<ElementNode, ParserError> {
48        let attributes = self.read_attributes();
49        let mut children = Vec::new();
50
51        while let Some(token) = self.next_token() {
52            match token {
53                Token::TagClose(ref close_tag) if *close_tag == tag_name => {
54                    return Ok(ElementNode {
55                        tag_name,
56                        attributes,
57                        children,
58                    });
59                }
60                Token::TagClose(close_tag) => {
61                    return Err(ParserError::UnexpectedClosingTag(tag_name, close_tag));
62                }
63                Token::TagOpen(tag) => {
64                    children.push(Node::Element(self.parse_element(tag)?));
65                }
66                Token::SelfClosingTag(tag) => {
67                    children.push(Node::Element(ElementNode {
68                        tag_name: tag,
69                        attributes: self.read_attributes(),
70                        children: vec![],
71                    }));
72                }
73                Token::Text(text) => {
74                    children.push(Node::Text(text));
75                }
76                _ => {}
77            }
78        }
79
80        Err(ParserError::UnexpectedClosingTag(tag_name, "".to_string()))
81    }
82}
83
84impl ParserTrait for Parser {
85    fn new(tokens: Vec<Token>) -> Self {
86        Self {
87            tokens,
88            position: 0,
89        }
90    }
91
92    fn parse(&mut self) -> Result<Vec<Node>, ParserError> {
93        let mut nodes = Vec::new();
94        while let Some(token) = self.next_token() {
95            match token {
96                Token::TagOpen(tag) => {
97                    nodes.push(Node::Element(self.parse_element(tag)?));
98                }
99                Token::SelfClosingTag(tag) => {
100                    nodes.push(Node::Element(ElementNode {
101                        tag_name: tag,
102                        attributes: self.read_attributes(),
103                        children: vec![],
104                    }));
105                }
106                Token::Text(text) => {
107                    nodes.push(Node::Text(text));
108                }
109                Token::TagClose(close_tag) => {
110                    return Err(ParserError::UnexpectedClosingTag(close_tag, "".to_string()));
111                }
112                _ => {}
113            }
114        }
115        Ok(nodes)
116    }
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122    use crate::token::Token;
123
124    #[test]
125    fn test_parse_single_element() {
126        let tokens = vec![
127            Token::TagOpen("div".to_string()),
128            Token::TagClose("div".to_string()),
129        ];
130        let mut parser = Parser::new(tokens);
131        let result = parser.parse();
132
133        let expected = vec![Node::Element(ElementNode {
134            tag_name: "div".to_string(),
135            attributes: vec![],
136            children: vec![],
137        })];
138
139        assert_eq!(result.is_ok(), true);
140        assert_eq!(result.unwrap(), expected);
141    }
142
143    #[test]
144    fn test_parse_element_with_text() {
145        let tokens = vec![
146            Token::TagOpen("p".to_string()),
147            Token::Text("Hello, world!".to_string()),
148            Token::TagClose("p".to_string()),
149        ];
150        let mut parser = Parser::new(tokens);
151        let result = parser.parse();
152
153        let expected = vec![Node::Element(ElementNode {
154            tag_name: "p".to_string(),
155            attributes: vec![],
156            children: vec![Node::Text("Hello, world!".to_string())],
157        })];
158
159        assert_eq!(result.is_ok(), true);
160        assert_eq!(result.unwrap(), expected);
161    }
162
163    #[test]
164    fn test_parse_nested_elements() {
165        let tokens = vec![
166            Token::TagOpen("div".to_string()),
167            Token::TagOpen("p".to_string()),
168            Token::Text("Nested".to_string()),
169            Token::TagClose("p".to_string()),
170            Token::TagClose("div".to_string()),
171        ];
172        let mut parser = Parser::new(tokens);
173        let result = parser.parse();
174
175        let expected = vec![Node::Element(ElementNode {
176            tag_name: "div".to_string(),
177            attributes: vec![],
178            children: vec![Node::Element(ElementNode {
179                tag_name: "p".to_string(),
180                attributes: vec![],
181                children: vec![Node::Text("Nested".to_string())],
182            })],
183        })];
184
185        assert_eq!(result.is_ok(), true);
186        assert_eq!(result.unwrap(), expected);
187    }
188
189    #[test]
190    fn test_parse_self_closing_tag() {
191        let tokens = vec![Token::SelfClosingTag("img".to_string())];
192        let mut parser = Parser::new(tokens);
193        let result = parser.parse();
194
195        let expected = vec![Node::Element(ElementNode {
196            tag_name: "img".to_string(),
197            attributes: vec![],
198            children: vec![],
199        })];
200
201        assert_eq!(result.is_ok(), true);
202        assert_eq!(result.unwrap(), expected);
203    }
204
205    #[test]
206    fn test_parse_element_with_attributes() {
207        let tokens = vec![
208            Token::SelfClosingTag("input".to_string()),
209            Token::AttributeName("type".to_string()),
210            Token::AttributeValue("text".to_string()),
211            Token::SelfClosingTag("input".to_string()),
212        ];
213        let mut parser = Parser::new(tokens);
214        let result = parser.parse();
215
216        let expected = vec![
217            Node::Element(ElementNode {
218                tag_name: "input".to_string(),
219                attributes: vec![("type".to_string(), "text".to_string())],
220                children: vec![],
221            }),
222            Node::Element(ElementNode {
223                tag_name: "input".to_string(),
224                attributes: vec![],
225                children: vec![],
226            }),
227        ];
228
229        assert_eq!(result.is_ok(), true);
230        assert_eq!(result.unwrap(), expected);
231    }
232
233    #[test]
234    fn test_parse_deeply_nested_elements() {
235        let tokens = vec![
236            Token::TagOpen("div".to_string()),
237            Token::TagOpen("section".to_string()),
238            Token::TagOpen("article".to_string()),
239            Token::Text("Deep content".to_string()),
240            Token::TagClose("article".to_string()),
241            Token::TagClose("section".to_string()),
242            Token::TagClose("div".to_string()),
243        ];
244        let mut parser = Parser::new(tokens);
245        let result = parser.parse();
246
247        let expected = vec![Node::Element(ElementNode {
248            tag_name: "div".to_string(),
249            attributes: vec![],
250            children: vec![Node::Element(ElementNode {
251                tag_name: "section".to_string(),
252                attributes: vec![],
253                children: vec![Node::Element(ElementNode {
254                    tag_name: "article".to_string(),
255                    attributes: vec![],
256                    children: vec![Node::Text("Deep content".to_string())],
257                })],
258            })],
259        })];
260
261        assert_eq!(result.is_ok(), true);
262        assert_eq!(result.unwrap(), expected);
263    }
264
265    #[test]
266    fn test_unclosed_div() {
267        let tokens = vec![
268            Token::TagOpen("div".to_string()),
269            Token::TagOpen("section".to_string()),
270            Token::TagOpen("article".to_string()),
271            Token::Text("Deep content".to_string()),
272            Token::TagClose("article".to_string()),
273            Token::TagClose("section".to_string()),
274        ];
275        let mut parser = Parser::new(tokens);
276        let result = parser.parse();
277
278        assert_eq!(result.is_err(), true);
279    }
280
281    #[test]
282    fn test_parse_complex_attributes() {
283        let tokens = vec![
284            Token::TagOpen("button".to_string()),
285            Token::AttributeName("class".to_string()),
286            Token::AttributeValue("btn primary".to_string()),
287            Token::AttributeName("disabled".to_string()),
288            Token::TagClose("button".to_string()),
289        ];
290        let mut parser = Parser::new(tokens);
291        let result = parser.parse();
292
293        let expected = vec![Node::Element(ElementNode {
294            tag_name: "button".to_string(),
295            attributes: vec![
296                ("class".to_string(), "btn primary".to_string()),
297                ("disabled".to_string(), "true".to_string()),
298            ],
299            children: vec![],
300        })];
301
302        // assert_eq!(result.is_ok(), true);
303        assert_eq!(result.unwrap(), expected);
304    }
305
306    #[test]
307    fn test_parse_complex_html_document() {
308        let tokens = vec![
309            Token::TagOpen("html".to_string()),
310            Token::AttributeName("lang".to_string()),
311            Token::AttributeValue("en".to_string()),
312            Token::TagOpen("head".to_string()),
313            Token::SelfClosingTag("meta".to_string()),
314            Token::AttributeName("charset".to_string()),
315            Token::AttributeValue("UTF-8".to_string()),
316            Token::SelfClosingTag("meta".to_string()),
317            Token::AttributeName("name".to_string()),
318            Token::AttributeValue("viewport".to_string()),
319            Token::AttributeName("content".to_string()),
320            Token::AttributeValue("width=device-width, initial-scale=1.0".to_string()),
321            Token::TagOpen("title".to_string()),
322            Token::Text("Document".to_string()),
323            Token::TagClose("title".to_string()),
324            Token::TagClose("head".to_string()),
325            Token::TagOpen("body".to_string()),
326            Token::Text("ok ceci est un texte".to_string()),
327            Token::TagOpen("a".to_string()),
328            Token::AttributeName("href".to_string()),
329            Token::AttributeValue("2".to_string()),
330            Token::Text("link1".to_string()),
331            Token::TagClose("a".to_string()),
332            Token::TagOpen("a".to_string()),
333            Token::AttributeName("href".to_string()),
334            Token::AttributeValue("1".to_string()),
335            Token::Text("link2".to_string()),
336            Token::TagClose("a".to_string()),
337            Token::TagOpen("form".to_string()),
338            Token::AttributeName("action".to_string()),
339            Token::AttributeValue("d".to_string()),
340            Token::AttributeName("method".to_string()),
341            Token::AttributeValue("get".to_string()),
342            Token::SelfClosingTag("input".to_string()),
343            Token::AttributeName("type".to_string()),
344            Token::AttributeValue("text".to_string()),
345            Token::AttributeName("name".to_string()),
346            Token::AttributeValue("name".to_string()),
347            Token::TagClose("form".to_string()),
348            Token::TagClose("body".to_string()),
349            Token::TagClose("html".to_string()),
350        ];
351
352        let mut parser = Parser::new(tokens);
353        let result = parser.parse();
354
355        let expected = vec![Node::Element(ElementNode {
356            tag_name: "html".to_string(),
357            attributes: vec![("lang".to_string(), "en".to_string())],
358            children: vec![
359                Node::Element(ElementNode {
360                    tag_name: "head".to_string(),
361                    attributes: vec![],
362                    children: vec![
363                        Node::Element(ElementNode {
364                            tag_name: "meta".to_string(),
365                            attributes: vec![("charset".to_string(), "UTF-8".to_string())],
366                            children: vec![],
367                        }),
368                        Node::Element(ElementNode {
369                            tag_name: "meta".to_string(),
370                            attributes: vec![
371                                ("name".to_string(), "viewport".to_string()),
372                                (
373                                    "content".to_string(),
374                                    "width=device-width, initial-scale=1.0".to_string(),
375                                ),
376                            ],
377                            children: vec![],
378                        }),
379                        Node::Element(ElementNode {
380                            tag_name: "title".to_string(),
381                            attributes: vec![],
382                            children: vec![Node::Text("Document".to_string())],
383                        }),
384                    ],
385                }),
386                Node::Element(ElementNode {
387                    tag_name: "body".to_string(),
388                    attributes: vec![],
389                    children: vec![
390                        Node::Text("ok ceci est un texte".to_string()),
391                        Node::Element(ElementNode {
392                            tag_name: "a".to_string(),
393                            attributes: vec![("href".to_string(), "2".to_string())],
394                            children: vec![Node::Text("link1".to_string())],
395                        }),
396                        Node::Element(ElementNode {
397                            tag_name: "a".to_string(),
398                            attributes: vec![("href".to_string(), "1".to_string())],
399                            children: vec![Node::Text("link2".to_string())],
400                        }),
401                        Node::Element(ElementNode {
402                            tag_name: "form".to_string(),
403                            attributes: vec![
404                                ("action".to_string(), "d".to_string()),
405                                ("method".to_string(), "get".to_string()),
406                            ],
407                            children: vec![Node::Element(ElementNode {
408                                tag_name: "input".to_string(),
409                                attributes: vec![
410                                    ("type".to_string(), "text".to_string()),
411                                    ("name".to_string(), "name".to_string()),
412                                ],
413                                children: vec![],
414                            })],
415                        }),
416                    ],
417                }),
418            ],
419        })];
420
421        println!("{:#?}", result);
422        println!("");
423        println!("{:#?}", expected);
424
425        assert_eq!(result.is_ok(), true);
426        assert_eq!(result.unwrap(), expected);
427    }
428}