1use anyhow::Error;
2use pest::Parser;
3use pest::iterators::Pairs;
4
5use crate::document::{Attr, Document, Element, NamedNodeMap};
6use crate::document_type::DocumentType;
7
8#[derive(Parser)]
9#[grammar = "grammar.pest"]
10pub struct LitheParser;
11
12pub fn parse(s: &str) -> Result<Document, Error> {
13 let mut result = LitheParser::parse(Rule::document, s)?;
14
15 let doc = build(&mut result);
16 Ok(doc)
17}
18
19fn build<'a>(pairs: &mut Pairs<'a, Rule>) -> Document<'a> {
81 let mut doc = Document::new();
82
83 #[allow(clippy::useless_conversion)]
84 for pair in pairs.into_iter() {
85 let rule = pair.as_rule();
86 let inner = pair.into_inner();
87 match rule {
88 Rule::EOI => {
89 return doc;
90 }
91 Rule::doctype => {
92 for i in inner {
93 if i.as_rule() == Rule::doctype_value {
94 let (spec, name) = match i.as_span().as_str() {
96 "html" => ("html", "html"),
97 "5" => ("html", "5"),
98 _ => ("", ""),
99 };
100 let doctype = DocumentType::new(spec, name);
101 doc.r#type = Some(doctype);
102 doc.children = build_element(pairs, 0);
104 break;
105 }
106 }
107 return doc;
108 }
109 _ => {}
110 }
111 }
112 doc
113}
114
115fn build_attributes<'a>(pairs: &mut Pairs<'a, Rule>) -> Vec<Attr<'a>> {
116 let mut attributes: NamedNodeMap = vec![];
117
118 for pair in pairs {
119 let rule = pair.as_rule();
120 let mut inner = pair.into_inner();
121 match rule {
122 Rule::link_attribute => {
123 while let Some(i) = inner.next() {
126 let name = i.as_span().as_str();
127 let value =
128 inner.next().map_or("", |a| a.as_span().as_str());
129 attributes.push(Attr { name, value });
130 }
131 }
132 _ => {
133 let mut i = inner.take(2);
135 let name = if let Some(a) = i.next() {
136 a.as_span().as_str()
137 } else {
138 break;
139 };
140 let value = i.next().map_or("", |a| a.as_span().as_str());
141 attributes.push(Attr { name, value });
142 }
143 }
144 }
145 attributes
146}
147
148fn build_element<'a>(
149 pairs: &mut Pairs<'a, Rule>,
150 level: usize,
151) -> Vec<Element<'a>> {
152 let mut result = vec![];
153 for pair in pairs {
154 let rule = pair.as_rule();
155
156 match rule {
157 Rule::EOI => {
158 return result;
159 }
160 Rule::indent => {
161 }
166 Rule::comment => {
167 let element = Element {
168 name: "".to_string(),
169 children: vec![],
170 attributes: vec![],
171 };
172 result.push(element);
173 }
174 Rule::html | Rule::head | Rule::body => {
175 let name = format!("{:?}", rule);
177 let mut element = Element {
178 name,
179 children: vec![],
180 attributes: vec![],
181 };
182 let mut inner = pair.into_inner();
183 element.attributes = build_attributes(&mut inner);
184 element.children = build_element(&mut inner, level);
185 result.push(element);
186 }
187 Rule::link => {
188 let mut element = Element {
190 name: "link".to_string(),
191 children: vec![],
192 attributes: vec![],
193 };
194 let mut inner = pair.into_inner();
195 element.attributes = build_attributes(&mut inner);
196 result.push(element);
197 }
198 _ => {} }
200 }
201 result
202}
203
204#[cfg(test)]
205mod test {
206 use super::*;
207
208 macro_rules! assert_rule {
209 ($rule:expr, $input:expr) => {
210 let result = LitheParser::parse($rule, $input)
211 .unwrap()
212 .peek()
213 .unwrap()
214 .as_rule();
215 assert_eq!($rule, result);
216 };
217 }
218
219 #[test]
220 fn test_code_comment() {
221 let comments = vec![
222 "/ foo bar baz qux quux",
223 "/foo bar baz qux quux",
224 "/ foo bar baz qux quux",
225 ];
226 for c in comments.iter() {
227 assert_rule!(Rule::code_comment, c);
228 }
229 }
230
231 #[test]
232 fn test_html_comment() {
233 let comments = vec![
234 "/! foo bar baz qux quux",
235 "/!foo bar baz qux quux",
236 "/! foo bar baz qux quux",
237 ];
238 for c in comments.iter() {
239 assert_rule!(Rule::html_comment, c);
240 }
241 }
242
243 #[test]
244 fn test_doctype() {
245 let doctypes = vec![
246 "doctype xml",
247 "doctype xml ISO-8859-1",
248 "doctype html",
249 "doctype 5",
250 "doctype\n1.1",
251 "doctype\n\n\n strict",
252 ];
253 for d in doctypes.iter() {
254 assert_rule!(Rule::doctype, d);
255 }
256 }
257
258 #[test]
259 fn test_parse() {
260 assert!(parse("/ Foo\n").is_ok());
262 assert!(parse("/! Bar").is_ok());
263
264 assert!(parse("doctype xml").is_ok());
265 assert!(parse("doctype xml").is_ok());
266 assert!(parse("doctype xml ISO-8859-1").is_ok());
267
268 assert!(parse("doctype html").is_ok());
272 assert!(parse("doctype 5").is_ok());
273 assert!(parse("doctype 1.1").is_ok());
274 assert!(parse("doctype strict").is_ok());
275 assert!(parse("doctype frameset").is_ok());
276 assert!(parse("doctype mobile").is_ok());
277 assert!(parse("doctype basic").is_ok());
278 assert!(parse("doctype transitional").is_ok());
279
280 assert!(parse("doctype html").is_ok());
282 assert!(parse("doctype 5").is_ok());
283 assert!(parse("doctype strict").is_ok());
284 assert!(parse("doctype frameset").is_ok());
285 assert!(parse("doctype transitional").is_ok());
286
287 assert!(parse("doctype unknown").is_err());
288 }
289
290 #[test]
291 fn test_parse_empty_doc() {
292 let doc = parse(
293 r#"doctype html
294/ Comment
295/! Das ist ein Test
296"#,
297 )
298 .unwrap();
299
300 let doctype = doc.r#type.unwrap();
301 assert_eq!("html".to_string(), doctype.name);
302 assert_eq!("", doctype.public_id);
303 assert_eq!("", doctype.system_id);
304
305 assert!(doc.children.is_empty());
306 }
307
308 #[test]
309 fn test_parse_html_tag() {
310 let doc = parse(
311 r#"doctype html
312html
313"#,
314 )
315 .unwrap();
316
317 let html = &doc.children[0];
318 assert_eq!("html", html.name);
319
320 assert!(html.children.is_empty());
321 assert!(html.attributes.is_empty());
322 }
323
324 #[test]
325 fn test_parse_html_tag_with_attributes() {
326 let doc = parse(
327 r#"doctype html
328html lang="en"
329"#,
330 )
331 .unwrap();
332
333 let html = &doc.children[0];
334 assert_eq!("html", html.name);
335
336 assert!(html.children.is_empty());
337
338 let attr = &html.attributes[0];
339 assert_eq!("lang", attr.name);
340 assert_eq!("en", attr.value);
341 }
342
343 #[test]
344 fn test_parse_entire_doc() {
345 let doc = parse(
346 r#"doctype html
347html
348 head
349 link rel="stylesheet" href="style.css"
350 body
351"#,
352 )
353 .unwrap();
354
355 let doctype = DocumentType::new("html", "html");
356 let expected = Document {
357 r#type: Some(doctype),
358 children: vec![Element {
359 name: "html".to_string(),
360 attributes: vec![],
361 children: vec![
362 Element {
363 name: "head".to_string(),
364 attributes: vec![],
365 children: vec![Element {
366 name: "link".to_string(),
367 attributes: vec![
368 Attr {
369 name: "rel",
370 value: "stylesheet",
371 },
372 Attr {
373 name: "href",
374 value: "style.css",
375 },
376 ],
377 children: vec![],
378 }],
379 },
380 Element {
381 name: "body".to_string(),
382 attributes: vec![],
383 children: vec![],
384 },
385 ],
386 }],
387 };
388 assert_eq!(expected, doc);
389 }
390}