1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
//! HTML Parser
//!
//! Fork from:
//! https://github.com/mathiversen/html-parser/blob/v0.6.3/src/grammar/rules.pest
item = _{
SOI ~ comment* ~ doctype? ~ node* ~ EOI
}
// Other
other = { !(comment | node_element | server | text) ~ ANY }
WHITESPACE = { " " | "\t" | NEWLINE }
/// Code
code = ${ "" ~ (!"" ~ ANY)* ~ "" }
/// DOCTYPE
doctype = { chevron_left_bang ~ doctype_name ~ attr* ~ chevron_right_normal }
doctype_name = @{ ^"doctype" }
/// SERVER CODE
server = ${ chevron_left_server ~ server_code ~ chevron_right_server }
server_code = { (!chevron_right_server ~ ANY)* }
chevron_left_server = { "<%" }
chevron_right_server = { "%>" }
/// NODES
node = _{ code | server | comment | node_element | text | other }
comment = { comment_if | comment_normal }
text = ${ (!(chevron_left_normal | comment_tag_start) ~ ANY)+ }
// NOTE: Should we be able to write < in text? ^^
node_element = { el_void | el_void_xml | el_process_instruct | javascript_text | style_text | el_raw_text | el_normal | el_dangling }
/// COMMENTS
comment_normal = _{ comment_tag_start ~ (!comment_tag_end ~ ANY)* ~ comment_tag_end }
comment_tag_start = _{ chevron_left_bang ~ "--" }
comment_tag_end = _{ "--" ~ chevron_right_normal }
/// Compatability with old IE browsers... This is not necessary for newer browsers
comment_if = _{ comment_if_start ~ (!comment_if_end ~ ANY)* ~ comment_if_end }
comment_if_start = _{ comment_tag_start ~ "[" ~ ^"if" }
comment_if_end = _{ chevron_left_bang ~ "[" ~ ^"endif" ~ "]" ~ comment_tag_end }
/// ATTRIBUTES
attr = { attr_key ~ (equal ~ (attr_non_quoted | attr_quoted))? }
attr_quoted = { PUSH(quote) ~ attr_value ~ end_attr_quoted }
end_attr_quoted = { POP }
attr_non_quoted = @{ !quote ~ (!(WHITESPACE | chevron_right) ~ ANY)* }
attr_key = ${ !WHITESPACE ~ (":" | "@" | "#" | ".")? ~ ASCII_ALPHA ~ text_chars* }
attr_value = ${ WHITESPACE* ~ (!PEEK ~ ANY)* ~ WHITESPACE* }
/// ELEMENTS
el_name = @{ ASCII_ALPHA ~ text_chars* }
/// Void element aka self-closing element
/// Ex:
el_void_name_html = @{
^"area"
| ^"base"
| ^"br"
| ^"col"
| ^"command"
| ^"embed"
| ^"hr"
| ^"img"
| ^"input"
| ^"keygen"
| ^"link"
| ^"meta"
| ^"param"
| ^"source"
| ^"track"
| ^"wbr"
| ^"meta"
}
// NOTE: This should not have to be a rule, but people doesn't know what void elements are...
el_void_name_svg = @{
^"path"
| ^"polygon"
| ^"rect"
| ^"circle"
}
el_void_name = @{ el_void_name_html | el_void_name_svg }
el_void = _{ chevron_left_normal ~ el_void_name ~ attr* ~ (chevron_right_normal | chevron_right_closed) }
el_void_xml = _{ chevron_left_normal ~ el_name ~ attr* ~ chevron_right_closed }
/// Open elements are default element that can take children
/// and have both a start tag and an end tag
/// Ex:
el_normal = _{ el_normal_start ~ (!el_normal_end ~ node)* ~ el_normal_end }
el_normal_start = _{ chevron_left_normal ~ PUSH(el_name) ~ attr* ~ chevron_right_normal }
el_normal_end = _{ chevron_left_closed ~ PUSH(el_name) ~ chevron_right_normal }
/// Raw text elements are elements with text/script content that
/// might interfere with the normal html syntax
el_raw_text_name = {
^"title"
| ^"textarea"
}
el_raw_text_content = ${ (!el_raw_text_end ~ ANY)* }
el_raw_text = _{ el_raw_text_start ~ el_raw_text_content ~ el_raw_text_end }
el_raw_text_start = _{ chevron_left_normal ~ PUSH(el_raw_text_name) ~ attr* ~ chevron_right_normal }
el_raw_text_end = { chevron_left_closed ~ PUSH(el_raw_text_name) ~ chevron_right_normal }
inline_javascript = ${ (!javascript_end ~ ANY)* }
javascript_text = _{ javascript_start ~ inline_javascript ~ javascript_end }
el_javascript_name = @{ ^"script" }
javascript_start = _{ chevron_left_normal ~ PUSH(el_javascript_name) ~ attr* ~ chevron_right_normal }
javascript_end = { chevron_left_closed ~ PUSH(el_javascript_name) ~ chevron_right_normal }
inline_style = ${ (!style_end ~ ANY)* }
el_style_name = @{ ^"style" }
style_text = _{ style_start ~ inline_style ~ style_end }
style_start = _{ chevron_left_normal ~ PUSH(el_style_name) ~ attr* ~ chevron_right_normal }
style_end = { chevron_left_closed ~ PUSH(el_style_name) ~ chevron_right_normal }
/// XML processing instruction
/// Ex:
el_process_instruct = { chevron_left_question ~ el_name? ~ attr* ~ chevron_right_question }
/// Catch dangling elements
/// Ex:
el_dangling = { chevron_left_closed ~ el_name ~ chevron_right_normal }
/// SYMBOLS / CHARACTERS
text_chars = _{ 'a'..'z' | 'A'..'Z' | "_" | "-" | ":" | '0'..'9' }
/// tag left
chevron_left_normal = { "<" }
chevron_left_closed = { "</" }
chevron_left_bang = { "<!" }
chevron_left_question = { "<?" }
/// tag right
chevron_right_normal = { ">" }
chevron_right_closed = { "/>" }
chevron_right_question = { "?>" }
chevron_right = _{
chevron_right_normal
| chevron_right_closed
| chevron_right_question
}
/// other
equal = @{ "=" }
quote = @{ "\"" | "'" }