1#[macro_use]
2extern crate pest_derive;
3use std::io::Cursor;
4use std::fmt::Debug;
5pub use pest::RuleType;
6use pest::Parser;
7use std::io::Write;
8
9#[derive(Debug)]
10pub enum Error<E> {
11 Parser(Box<pest::error::Error<Rule>>),
12 Include(E),
13 Io(std::io::Error),
14}
15
16impl<E: Debug> From<pest::error::Error<Rule>> for Error<E> {
17 fn from(e : pest::error::Error<Rule>) -> Self {
18 Error::Parser(Box::new(e))
19 }
20}
21
22impl<E: Debug> From<std::io::Error> for Error<E> {
23 fn from(e : std::io::Error) -> Self {
24 Error::Io(e)
25 }
26}
27
28#[cfg(target_arch = "wasm32")]
29use wasm_bindgen::prelude::*;
30
31#[derive(Parser)]
32#[grammar = "pug.pest"]
33pub struct PugParser;
34
35
36#[derive(Default, Debug)]
37pub struct Ast {
38 pub children: Vec<Ast>,
39 pub id: Option<String>,
40 pub element: String,
41 pub class: Vec<String>,
42 pub attrs: Vec<String>,
43}
44
45impl Ast {
46 pub fn special<A: Into<String>, B: Into<String>>(element: A, id: B) -> Self {
47 Self {
48 element: element.into(),
49 id: Some(id.into()),
50 .. Default::default()
51 }
52 }
53
54 pub fn expand<E, F>(mut self, mut inc: F) -> Result<Self, Error<E>>
55 where E: Debug,
56 F: Clone + FnMut(String) -> Result<Ast, E>,
57 {
58 match self.element.as_ref() {
59 ":include" => {
60 return inc(self.id.as_ref().unwrap().to_string()).map_err(Error::Include)?.expand(inc.clone());
61 }
62 _ => {
63 for child in std::mem::take(&mut self.children) {
64 self.children.push(child.expand(inc.clone())?);
65 };
66 Ok(self)
67 }
68 }
69 }
70
71 pub fn to_str(&self) -> Result<String, std::io::Error>{
72 let mut buffer = Vec::new();
73 let mut writer = Cursor::new(&mut buffer);
74 self.to_html(&mut writer)?;
75 Ok(String::from_utf8_lossy(&buffer).into())
76 }
77
78 pub fn to_html<W>(&self, w: &mut W) -> Result<(), std::io::Error>
79 where W: Write,
80
81 {
82 self.to_html_i(w, &mut false)
83 }
84
85 fn to_html_i<W>(&self, w: &mut W, previous_was_text: &mut bool) -> Result<(), std::io::Error>
86 where W: Write,
87
88 {
89 match self.element.as_ref() {
90 ":include" => {
91 panic!("include cannot be written to html. forgot to call expand?");
92 }
93 ":document" => {
94 *previous_was_text = false;
95 for child in &self.children {
96 child.to_html_i(w, previous_was_text)?;
97 }
98 return Ok(());
99 }
100 ":text" => {
101 if *previous_was_text {
102 w.write_all(b"\n")?;
103 }
104 *previous_was_text = true;
105 w.write_all(self.id.as_ref().unwrap().as_bytes())?;
106 return Ok(());
107 }
108 ":doctype" => {
109 *previous_was_text = false;
110 w.write_all(b"<!DOCTYPE ")?;
111 w.write_all(self.id.as_ref().unwrap().as_bytes())?;
112 w.write_all(b">")?;
113 return Ok(());
114 }
115 _ => {
116 *previous_was_text = false;
117 w.write_all(b"<")?;
118 w.write_all(self.element.as_bytes())?;
119 if !self.class.is_empty() {
120 w.write_all(b" class=\"")?;
121 w.write_all(self.class.join(" ").as_bytes())?;
122 w.write_all(b"\"")?;
123 }
124 if let Some(ref id) = self.id {
125 w.write_all(b" id=\"")?;
126 w.write_all(id.as_bytes())?;
127 w.write_all(b"\"")?;
128 }
129 for attr in &self.attrs {
130 w.write_all(b" ")?;
131 w.write_all(attr.as_bytes())?;
132 }
133 match self.element.as_ref() {
134 "area"|"base"|"br"|"col"|"command"|"embed"|"hr"|"img"|"input"|"keygen"|"link"|"meta"|"param"|"source"|"track"|"wbr" => {
135 w.write_all(b">")?;
136 return Ok(());
137 },
138 _ => (),
139 };
140 w.write_all(b">")?;
141 }
142 }
143
144 for child in &self.children {
145 child.to_html_i(w, previous_was_text)?;
146 }
147
148 w.write_all(b"</")?;
149 w.write_all(self.element.as_bytes())?;
150 w.write_all(b">")?;
151
152 Ok(())
153 }
154}
155
156fn parse_impl(file: &str) -> Result<Ast, Box<pest::error::Error<Rule>>> {
157 let mut file = PugParser::parse(Rule::file, file)?;
158
159 let mut comment = None;
160 let mut indent = 0;
161
162 let mut cur = Ast { element: ":document".into(), ..Default::default() };
163 let mut stack : Vec<(usize, Ast)> = Vec::new();
164
165 for decl in file.next().unwrap().into_inner() {
166 match decl.as_rule() {
167 Rule::indent => {
168 indent = decl.as_str().len();
169 if let Some(ind) = comment {
170 if indent > ind {
171 continue;
172 } else {
173 comment = None;
174 }
175 }
176
177 while let Some((ind, mut ast)) = stack.pop() {
178 if ind >= indent {
179 ast.children.push(std::mem::take(&mut cur));
180 cur = ast;
181 } else {
182 stack.push((ind,ast));
183 break;
184 }
185 }
186 }
187 Rule::include => {
188 cur.children.push(Ast::special(":include", decl.into_inner().as_str()));
189 }
190 Rule::doctype => {
191 cur.children.push(Ast::special(":doctype", decl.into_inner().as_str()));
192 }
193 Rule::tag => {
194 if comment.is_some() {
195 continue;
196 }
197
198 let parent = std::mem::take(&mut cur);
199 stack.push((indent, parent));
200
201 cur.element = "div".into();
202 for e in decl.into_inner() {
203 match e.as_rule() {
204 Rule::element => {
205 cur.element = e.as_str().to_string();
206 }
207 Rule::class => {
208 cur.class.push(e.into_inner().next().unwrap().as_str().to_string());
209 }
210 Rule::id => {
211 cur.id = Some(e.into_inner().next().unwrap().as_str().to_string());
212 }
213 Rule::attrs => {
214 for e in e.into_inner() {
215 let mut e = e.into_inner();
216 let key = e.next().unwrap().as_str();
217 let value = e.next().unwrap();
218 if key == "id" {
219 cur.id = Some(
220 value.into_inner().next().unwrap().as_str().to_string(),
221 );
222 } else if key == "class" {
223 cur.class.push(
224 value.into_inner().next().unwrap().as_str().to_string(),
225 );
226 } else {
227 cur.attrs.push(format!("{}={}", key, value.as_str()));
228 }
229 }
230 }
231 _ => unreachable!(),
232 }
233 }
234
235 }
236 Rule::comment => {
237 if comment.is_some() {
238 continue;
239 }
240 comment = Some(indent);
241 }
242 Rule::text => {
243 if comment.is_some() {
244 continue;
245 }
246 let text = decl.as_str().to_string();
247 cur.children.push(Ast::special(":text", text));
248 }
249 Rule::EOI => {
250 for (_, mut ast) in stack.drain(..).rev() {
251 ast.children.push(std::mem::take(&mut cur));
252 cur = ast;
253 }
254 }
255 any => panic!("parser bug. did not expect: {:?}", any),
256 }
257 }
258
259 Ok(cur)
260}
261
262pub fn parse<S: Into<String>>(file: S) -> Result<Ast, Box<pest::error::Error<Rule>>> {
264 let mut file = file.into();
265 file.push('\n');
266 parse_impl(&file)
267}
268
269#[test]
270pub fn valid_identitifer_characters() {
271 let mut html = Vec::new();
272 parse(
273 r#"a(a="b",a-:.b.="c"
274x="y")"#
275 ).unwrap().to_html(&mut html).unwrap();
276 assert_eq!(html, br#"<a a="b" a-:.b.="c" x="y"></a>"#);
277}
278
279#[test]
280pub fn emptyline() {
281 let mut html = Vec::new();
282 parse(
283 r#"
284a
285 b
286
287 c
288
289"#
290 ).unwrap().to_html(&mut html).unwrap();
291 assert_eq!(html, br#"<a><b></b><c></c></a>"#);
292}
293
294#[test]
295pub fn dupclass() {
296 let mut html = Vec::new();
297 parse(r#"a#x.b(id="v" class="c")"#).unwrap().to_html(&mut html).unwrap();
298 assert_eq!(
299 String::from_utf8_lossy(&html),
300 r#"<a class="b c" id="v"></a>"#
301 );
302}
303
304#[test]
305pub fn preserve_newline_in_multiline_text() {
306 let mut html = Vec::new();
307 parse(
308 r#"pre
309 | The pipe always goes at the beginning of its own line,
310 | not counting indentation.
311 | lol look at me
312 | getting all getho indent
313 | watt"#
314 ).unwrap().to_html(&mut html).unwrap();
315
316
317 assert_eq!(
318 String::from_utf8_lossy(&html),
319 r#"<pre>The pipe always goes at the beginning of its own line,
320not counting indentation.
321 lol look at me
322 getting all getho indent
323 watt</pre>"#
324 );
325}
326
327#[test]
328pub fn eoi() {
329 let mut html = Vec::new();
330 parse(
331 r#"body#blorp.herp.derp
332 a(href="google.de")
333derp
334 yorlo jaja"#
335 ).unwrap().to_html(&mut html).unwrap();
336
337 assert_eq!(
338 String::from_utf8_lossy(&html),
339 r#"<body class="herp derp" id="blorp"><a href="google.de"></a></body><derp><yorlo>jaja</yorlo></derp>"#
340 );
341
342 let mut html = Vec::new();
343 parse(
344 r#"body#blorp.herp.derp
345 a(href="google.de")
346derp
347 yorlo jaja
348 "#
349 ).unwrap().to_html(&mut html).unwrap();
350
351 assert_eq!(
352 String::from_utf8_lossy(&html),
353 r#"<body class="herp derp" id="blorp"><a href="google.de"></a></body><derp><yorlo>jaja</yorlo></derp>"#
354 );
355
356 let mut html = Vec::new();
357 parse(
358 r#"body#blorp.herp.derp
359 a(href="google.de")
360derp
361 yorlo jaja
362
363
364
365"#
366 ).unwrap().to_html(&mut html).unwrap();
367 assert_eq!(
368 String::from_utf8_lossy(&html),
369 r#"<body class="herp derp" id="blorp"><a href="google.de"></a></body><derp><yorlo>jaja</yorlo></derp>"#
370 );
371}
372
373#[test]
374pub fn doctype() {
375 let mut html = Vec::new();
376 parse(
377 r#"doctype html
378html
379 body
380"#
381 ).unwrap().to_html(&mut html).unwrap();
382 assert_eq!(
383 String::from_utf8_lossy(&html),
384 r#"<!DOCTYPE html><html><body></body></html>"#
385 );
386}
387
388#[test]
389pub fn voidelements() {
390 let mut html = Vec::new();
391 parse(
392 r#"
393doctype html
394html
395 head(lang="en")
396 meta(charset="utf-8")
397 title n1's personal site
398 link(rel="stylesheet", href="normalize.css")
399 link(rel="stylesheet", href="style.css")
400
401 body
402 .container
403"#
404 ).unwrap().to_html(&mut html).unwrap();
405
406 assert_eq!(
407 String::from_utf8_lossy(&html),
408 r#"<!DOCTYPE html><html><head lang="en"><meta charset="utf-8"><title>n1's personal site</title><link rel="stylesheet" href="normalize.css"><link rel="stylesheet" href="style.css"></head><body><div class="container"></div></body></html>"#
409 );
410}
411
412
413#[test]
414pub fn include_p() {
415 let ast = parse("include ./a").unwrap();
416 assert_eq!(
417 ast.children.len(),
418 1
419 );
420 assert_eq!(
421 ast.children[0].element,
422 ":include"
423 );
424}
425
426
427#[test]
428pub fn include () {
429 let f = |i:String| match i.as_ref() {
430 "/a/1" => parse("include a"),
431 _ => parse("| tomato"),
432 };
433 let mut html = Vec::new();
434 parse(
435 r#"
436doctype html
437kebab
438 include /a/1
439"#
440 ).unwrap().expand(f).unwrap().to_html(&mut html).unwrap();
441 assert_eq!(
442 String::from_utf8_lossy(&html),
443 r#"<!DOCTYPE html><kebab>tomato</kebab>"#
444 );
445}