1use std::{collections::HashMap, io::Read};
2
3use crate::{char_reader::CharReader, parse_error::ParseError};
4
5#[macro_export]
6macro_rules! html {
7 ($x:tt) => {
8 $crate::html::parse_html(format!($x).as_bytes())
9 .map(|html| match html.into_iter().next() {
10 Some(i) => i,
11 None => panic!("has to contain valid html"),
12 })
13 .expect("should contain valid html")
14 };
15}
16
17pub fn parse_html(input: impl Read) -> Result<Vec<Html>, ParseError> {
18 let mut reader = CharReader::new(input);
19
20 let mut tokens = vec![];
21
22 loop {
23 match read_token(&mut reader)? {
24 None => break,
25 Some(t) => tokens.push(t),
26 }
27 }
28
29 let mut reduced_tokens = vec![];
31 for token in tokens.into_iter() {
32 if let Some(Html::Text { text: a }) = reduced_tokens.last_mut() {
33 if let Html::Text { text: b } = &token {
34 *a += b;
35 continue;
36 }
37 }
38 reduced_tokens.push(token)
39 }
40
41 Ok(reduced_tokens)
42}
43
44pub fn parse_html_block(
48 reader: &mut CharReader<impl Read>,
49) -> Result<Option<(String, HashMap<String, String>, String)>, ParseError> {
50 if let Some('<') = reader.peek_char(0)? {
51 if let Some(start_tag) = reader.peek_until(|c| c == '>')? {
52 let mut tag = String::new();
54 for c in start_tag[1..start_tag.len() - 1].chars() {
55 match c {
56 ' ' => break,
57 '\n' => break,
58 _ => tag.push(c),
59 }
60 }
61
62 let mut attributes = HashMap::new();
64 let chars: Vec<char> = start_tag[1 + tag.len()..start_tag.len() - 1]
65 .chars()
66 .collect();
67 let mut key = String::new();
68 let mut value = String::new();
69 let mut in_value = false;
70 let mut i = 0;
71 while i < chars.len() {
72 match chars[i] {
73 ' ' if in_value == false => {
74 if key.len() > 0 {
75 attributes.insert(key, value);
76 key = String::new();
77 value = String::new();
78 in_value = false;
79 }
80 }
81 '=' => match chars.get(i + 1) {
82 Some('"') | Some('\'') => {
83 i += 1;
84 in_value = true
85 }
86 _ => {}
87 },
88 '\'' | '"' if in_value => in_value = false,
89 c => {
90 if in_value {
91 value.push(c)
92 } else {
93 key.push(c)
94 }
95 }
96 }
97 i += 1;
98 }
99 if key.len() > 0 {
100 attributes.insert(key, value);
101 }
102
103 let end_tag = format!("</{tag}>");
104 if let Some(html_block) = reader.peek_until_match_inclusive(&end_tag)? {
105 reader.consume(start_tag.len())?;
106 let mut content = reader.consume_string(html_block.len() - start_tag.len())?;
107 content.truncate(content.len() - end_tag.len());
108
109 let mut children = vec![];
110 let mut reader = CharReader::new(content.as_bytes());
111 while let Some(html) = read_token(&mut reader)? {
112 children.push(html);
113 }
114 return Ok(Some((tag, attributes, content)));
115 }
116 }
117 }
118 return Ok(None);
119}
120
121pub fn read_token(reader: &mut CharReader<impl Read>) -> Result<Option<Html>, ParseError> {
126 match reader.peek_char(0)? {
127 None => return Ok(None),
128 Some(c) => {
129 if c == '<' {
130 if "<!--" == reader.peek_string(4)? {
131 if let Some(text) = reader.peek_until_match_inclusive("-->")? {
132 reader.consume(4)?; let text = reader.consume_string(text.len() - 4 - 3)?;
134 reader.consume(3)?; return Ok(Some(Html::Comment { text }));
136 }
137 }
138
139 if let Some((tag, attributes, content)) = parse_html_block(reader)? {
140 let mut children = vec![];
141 let mut reader = CharReader::new(content.as_bytes());
142 while let Some(html) = read_token(&mut reader)? {
143 children.push(html);
144 }
145 return Ok(Some(Html::Element {
146 tag,
147 attributes,
148 children,
149 }));
150 }
151
152 reader.consume(1)?;
154 let mut text = "<".to_string();
155 text.push_str(&reader.consume_until_exclusive(|c| c == '<')?);
156 return Ok(Some(Html::Text { text }));
157 }
158
159 let text = reader.consume_until_exclusive(|c| c == '<')?;
160 return Ok(Some(Html::Text { text }));
161 }
162 }
163}
164
165#[derive(Debug, Clone, PartialEq)]
166pub enum Html {
167 Comment {
168 text: String,
169 },
170 Text {
171 text: String,
172 },
173 Element {
174 tag: String,
175 attributes: HashMap<String, String>,
176 children: Vec<Html>,
177 },
178}
179
180#[cfg(test)]
181mod tests {
182 use std::io::Cursor;
183
184 use crate::html::to_attributes;
185
186 use super::*;
187
188 #[test]
189 fn test_html() {
190 let input = r#"<a href="test.com"><i class="fa-solid fa-rss"></i>Test</a>
191<button disabled></button>"#;
192 let expected = vec![
193 Html::Element {
194 tag: "a".into(),
195 attributes: to_attributes([("href", "test.com")]),
196 children: vec![
197 Html::Element {
198 tag: "i".into(),
199 attributes: to_attributes([("class", "fa-solid fa-rss")]),
200 children: vec![],
201 },
202 Html::Text {
203 text: "Test".into(),
204 },
205 ],
206 },
207 Html::Text { text: "\n".into() },
208 Html::Element {
209 tag: "button".into(),
210 attributes: to_attributes([("disabled", "")]),
211 children: vec![],
212 },
213 ];
214
215 let reader: Box<dyn Read> = Box::new(Cursor::new(input));
216 let tokens = parse_html(reader).unwrap();
217 assert_eq!(expected, tokens);
218
219 let input = r#"<div>
220<a href="link.com">[other](other.com)</a>
221</div>"#;
222 let expected = vec![Html::Element {
223 tag: "div".into(),
224 attributes: HashMap::new(),
225 children: vec![
226 Html::Text { text: "\n".into() },
227 Html::Element {
228 tag: "a".into(),
229 attributes: to_attributes([("href", "link.com")]),
230 children: vec".into(),
232 }],
233 },
234 Html::Text { text: "\n".into() },
235 ],
236 }];
237 let reader: Box<dyn Read> = Box::new(Cursor::new(input));
238 let tokens = parse_html(reader).unwrap();
239 assert_eq!(expected, tokens);
240 }
241
242 #[test]
243 fn test_text_looks_like_html() {
244 let input = r#"<Lots of people say Rust > c++. even though it might be
245< then c++. Who knows?
246<nonclosing>
247This should be text
248"#;
249 let expected = vec![Html::Text {
250 text: "<Lots of people say Rust > c++. even though it might be
251< then c++. Who knows?
252<nonclosing>
253This should be text
254"
255 .into(),
256 }];
257
258 let reader: Box<dyn Read> = Box::new(Cursor::new(input));
259 let tokens = parse_html(reader).unwrap();
260 assert_eq!(expected, tokens);
261 }
262}